github.com/jimmyx0x/go-ethereum@v1.10.28/core/bloombits/matcher.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package bloombits 18 19 import ( 20 "bytes" 21 "context" 22 "errors" 23 "math" 24 "sort" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/ethereum/go-ethereum/common/bitutil" 30 "github.com/ethereum/go-ethereum/crypto" 31 ) 32 33 // bloomIndexes represents the bit indexes inside the bloom filter that belong 34 // to some key. 35 type bloomIndexes [3]uint 36 37 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 38 func calcBloomIndexes(b []byte) bloomIndexes { 39 b = crypto.Keccak256(b) 40 41 var idxs bloomIndexes 42 for i := 0; i < len(idxs); i++ { 43 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 44 } 45 return idxs 46 } 47 48 // partialMatches with a non-nil vector represents a section in which some sub- 49 // matchers have already found potential matches. Subsequent sub-matchers will 50 // binary AND their matches with this vector. If vector is nil, it represents a 51 // section to be processed by the first sub-matcher. 52 type partialMatches struct { 53 section uint64 54 bitset []byte 55 } 56 57 // Retrieval represents a request for retrieval task assignments for a given 58 // bit with the given number of fetch elements, or a response for such a request. 59 // It can also have the actual results set to be used as a delivery data struct. 60 // 61 // The contest and error fields are used by the light client to terminate matching 62 // early if an error is encountered on some path of the pipeline. 63 type Retrieval struct { 64 Bit uint 65 Sections []uint64 66 Bitsets [][]byte 67 68 Context context.Context 69 Error error 70 } 71 72 // Matcher is a pipelined system of schedulers and logic matchers which perform 73 // binary AND/OR operations on the bit-streams, creating a stream of potential 74 // blocks to inspect for data content. 75 type Matcher struct { 76 sectionSize uint64 // Size of the data batches to filter on 77 78 filters [][]bloomIndexes // Filter the system is matching for 79 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 80 81 retrievers chan chan uint // Retriever processes waiting for bit allocations 82 counters chan chan uint // Retriever processes waiting for task count reports 83 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 84 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 85 86 running uint32 // Atomic flag whether a session is live or not 87 } 88 89 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 90 // address and topic filtering on them. Setting a filter component to `nil` is 91 // allowed and will result in that filter rule being skipped (OR 0x11...1). 92 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 93 // Create the matcher instance 94 m := &Matcher{ 95 sectionSize: sectionSize, 96 schedulers: make(map[uint]*scheduler), 97 retrievers: make(chan chan uint), 98 counters: make(chan chan uint), 99 retrievals: make(chan chan *Retrieval), 100 deliveries: make(chan *Retrieval), 101 } 102 // Calculate the bloom bit indexes for the groups we're interested in 103 m.filters = nil 104 105 for _, filter := range filters { 106 // Gather the bit indexes of the filter rule, special casing the nil filter 107 if len(filter) == 0 { 108 continue 109 } 110 bloomBits := make([]bloomIndexes, len(filter)) 111 for i, clause := range filter { 112 if clause == nil { 113 bloomBits = nil 114 break 115 } 116 bloomBits[i] = calcBloomIndexes(clause) 117 } 118 // Accumulate the filter rules if no nil rule was within 119 if bloomBits != nil { 120 m.filters = append(m.filters, bloomBits) 121 } 122 } 123 // For every bit, create a scheduler to load/download the bit vectors 124 for _, bloomIndexLists := range m.filters { 125 for _, bloomIndexList := range bloomIndexLists { 126 for _, bloomIndex := range bloomIndexList { 127 m.addScheduler(bloomIndex) 128 } 129 } 130 } 131 return m 132 } 133 134 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 135 // it has not existed before. If the bit is already selected for filtering, the 136 // existing scheduler can be used. 137 func (m *Matcher) addScheduler(idx uint) { 138 if _, ok := m.schedulers[idx]; ok { 139 return 140 } 141 m.schedulers[idx] = newScheduler(idx) 142 } 143 144 // Start starts the matching process and returns a stream of bloom matches in 145 // a given range of blocks. If there are no more matches in the range, the result 146 // channel is closed. 147 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 148 // Make sure we're not creating concurrent sessions 149 if atomic.SwapUint32(&m.running, 1) == 1 { 150 return nil, errors.New("matcher already running") 151 } 152 defer atomic.StoreUint32(&m.running, 0) 153 154 // Initiate a new matching round 155 session := &MatcherSession{ 156 matcher: m, 157 quit: make(chan struct{}), 158 ctx: ctx, 159 } 160 for _, scheduler := range m.schedulers { 161 scheduler.reset() 162 } 163 sink := m.run(begin, end, cap(results), session) 164 165 // Read the output from the result sink and deliver to the user 166 session.pend.Add(1) 167 go func() { 168 defer session.pend.Done() 169 defer close(results) 170 171 for { 172 select { 173 case <-session.quit: 174 return 175 176 case res, ok := <-sink: 177 // New match result found 178 if !ok { 179 return 180 } 181 // Calculate the first and last blocks of the section 182 sectionStart := res.section * m.sectionSize 183 184 first := sectionStart 185 if begin > first { 186 first = begin 187 } 188 last := sectionStart + m.sectionSize - 1 189 if end < last { 190 last = end 191 } 192 // Iterate over all the blocks in the section and return the matching ones 193 for i := first; i <= last; i++ { 194 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 195 next := res.bitset[(i-sectionStart)/8] 196 if next == 0 { 197 if i%8 == 0 { 198 i += 7 199 } 200 continue 201 } 202 // Some bit it set, do the actual submatching 203 if bit := 7 - i%8; next&(1<<bit) != 0 { 204 select { 205 case <-session.quit: 206 return 207 case results <- i: 208 } 209 } 210 } 211 } 212 } 213 }() 214 return session, nil 215 } 216 217 // run creates a daisy-chain of sub-matchers, one for the address set and one 218 // for each topic set, each sub-matcher receiving a section only if the previous 219 // ones have all found a potential match in one of the blocks of the section, 220 // then binary AND-ing its own matches and forwarding the result to the next one. 221 // 222 // The method starts feeding the section indexes into the first sub-matcher on a 223 // new goroutine and returns a sink channel receiving the results. 224 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 225 // Create the source channel and feed section indexes into 226 source := make(chan *partialMatches, buffer) 227 228 session.pend.Add(1) 229 go func() { 230 defer session.pend.Done() 231 defer close(source) 232 233 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 234 select { 235 case <-session.quit: 236 return 237 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 238 } 239 } 240 }() 241 // Assemble the daisy-chained filtering pipeline 242 next := source 243 dist := make(chan *request, buffer) 244 245 for _, bloom := range m.filters { 246 next = m.subMatch(next, dist, bloom, session) 247 } 248 // Start the request distribution 249 session.pend.Add(1) 250 go m.distributor(dist, session) 251 252 return next 253 } 254 255 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 256 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 257 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 258 // that address/topic, and binary AND-ing those vectors together. 259 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 260 // Start the concurrent schedulers for each bit required by the bloom filter 261 sectionSources := make([][3]chan uint64, len(bloom)) 262 sectionSinks := make([][3]chan []byte, len(bloom)) 263 for i, bits := range bloom { 264 for j, bit := range bits { 265 sectionSources[i][j] = make(chan uint64, cap(source)) 266 sectionSinks[i][j] = make(chan []byte, cap(source)) 267 268 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 269 } 270 } 271 272 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 273 results := make(chan *partialMatches, cap(source)) 274 275 session.pend.Add(2) 276 go func() { 277 // Tear down the goroutine and terminate all source channels 278 defer session.pend.Done() 279 defer close(process) 280 281 defer func() { 282 for _, bloomSources := range sectionSources { 283 for _, bitSource := range bloomSources { 284 close(bitSource) 285 } 286 } 287 }() 288 // Read sections from the source channel and multiplex into all bit-schedulers 289 for { 290 select { 291 case <-session.quit: 292 return 293 294 case subres, ok := <-source: 295 // New subresult from previous link 296 if !ok { 297 return 298 } 299 // Multiplex the section index to all bit-schedulers 300 for _, bloomSources := range sectionSources { 301 for _, bitSource := range bloomSources { 302 select { 303 case <-session.quit: 304 return 305 case bitSource <- subres.section: 306 } 307 } 308 } 309 // Notify the processor that this section will become available 310 select { 311 case <-session.quit: 312 return 313 case process <- subres: 314 } 315 } 316 } 317 }() 318 319 go func() { 320 // Tear down the goroutine and terminate the final sink channel 321 defer session.pend.Done() 322 defer close(results) 323 324 // Read the source notifications and collect the delivered results 325 for { 326 select { 327 case <-session.quit: 328 return 329 330 case subres, ok := <-process: 331 // Notified of a section being retrieved 332 if !ok { 333 return 334 } 335 // Gather all the sub-results and merge them together 336 var orVector []byte 337 for _, bloomSinks := range sectionSinks { 338 var andVector []byte 339 for _, bitSink := range bloomSinks { 340 var data []byte 341 select { 342 case <-session.quit: 343 return 344 case data = <-bitSink: 345 } 346 if andVector == nil { 347 andVector = make([]byte, int(m.sectionSize/8)) 348 copy(andVector, data) 349 } else { 350 bitutil.ANDBytes(andVector, andVector, data) 351 } 352 } 353 if orVector == nil { 354 orVector = andVector 355 } else { 356 bitutil.ORBytes(orVector, orVector, andVector) 357 } 358 } 359 360 if orVector == nil { 361 orVector = make([]byte, int(m.sectionSize/8)) 362 } 363 if subres.bitset != nil { 364 bitutil.ANDBytes(orVector, orVector, subres.bitset) 365 } 366 if bitutil.TestBytes(orVector) { 367 select { 368 case <-session.quit: 369 return 370 case results <- &partialMatches{subres.section, orVector}: 371 } 372 } 373 } 374 } 375 }() 376 return results 377 } 378 379 // distributor receives requests from the schedulers and queues them into a set 380 // of pending requests, which are assigned to retrievers wanting to fulfil them. 381 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 382 defer session.pend.Done() 383 384 var ( 385 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 386 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 387 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 388 allocs int // Number of active allocations to handle graceful shutdown requests 389 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 390 ) 391 392 // assign is a helper method fo try to assign a pending bit an actively 393 // listening servicer, or schedule it up for later when one arrives. 394 assign := func(bit uint) { 395 select { 396 case fetcher := <-m.retrievers: 397 allocs++ 398 fetcher <- bit 399 default: 400 // No retrievers active, start listening for new ones 401 retrievers = m.retrievers 402 unallocs[bit] = struct{}{} 403 } 404 } 405 406 for { 407 select { 408 case <-shutdown: 409 // Shutdown requested. No more retrievers can be allocated, 410 // but we still need to wait until all pending requests have returned. 411 shutdown = nil 412 if allocs == 0 { 413 return 414 } 415 416 case req := <-dist: 417 // New retrieval request arrived to be distributed to some fetcher process 418 queue := requests[req.bit] 419 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 420 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 421 422 // If it's a new bit and we have waiting fetchers, allocate to them 423 if len(queue) == 0 { 424 assign(req.bit) 425 } 426 427 case fetcher := <-retrievers: 428 // New retriever arrived, find the lowest section-ed bit to assign 429 bit, best := uint(0), uint64(math.MaxUint64) 430 for idx := range unallocs { 431 if requests[idx][0] < best { 432 bit, best = idx, requests[idx][0] 433 } 434 } 435 // Stop tracking this bit (and alloc notifications if no more work is available) 436 delete(unallocs, bit) 437 if len(unallocs) == 0 { 438 retrievers = nil 439 } 440 allocs++ 441 fetcher <- bit 442 443 case fetcher := <-m.counters: 444 // New task count request arrives, return number of items 445 fetcher <- uint(len(requests[<-fetcher])) 446 447 case fetcher := <-m.retrievals: 448 // New fetcher waiting for tasks to retrieve, assign 449 task := <-fetcher 450 if want := len(task.Sections); want >= len(requests[task.Bit]) { 451 task.Sections = requests[task.Bit] 452 delete(requests, task.Bit) 453 } else { 454 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 455 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 456 } 457 fetcher <- task 458 459 // If anything was left unallocated, try to assign to someone else 460 if len(requests[task.Bit]) > 0 { 461 assign(task.Bit) 462 } 463 464 case result := <-m.deliveries: 465 // New retrieval task response from fetcher, split out missing sections and 466 // deliver complete ones 467 var ( 468 sections = make([]uint64, 0, len(result.Sections)) 469 bitsets = make([][]byte, 0, len(result.Bitsets)) 470 missing = make([]uint64, 0, len(result.Sections)) 471 ) 472 for i, bitset := range result.Bitsets { 473 if len(bitset) == 0 { 474 missing = append(missing, result.Sections[i]) 475 continue 476 } 477 sections = append(sections, result.Sections[i]) 478 bitsets = append(bitsets, bitset) 479 } 480 m.schedulers[result.Bit].deliver(sections, bitsets) 481 allocs-- 482 483 // Reschedule missing sections and allocate bit if newly available 484 if len(missing) > 0 { 485 queue := requests[result.Bit] 486 for _, section := range missing { 487 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 488 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 489 } 490 requests[result.Bit] = queue 491 492 if len(queue) == len(missing) { 493 assign(result.Bit) 494 } 495 } 496 497 // End the session when all pending deliveries have arrived. 498 if shutdown == nil && allocs == 0 { 499 return 500 } 501 } 502 } 503 } 504 505 // MatcherSession is returned by a started matcher to be used as a terminator 506 // for the actively running matching operation. 507 type MatcherSession struct { 508 matcher *Matcher 509 510 closer sync.Once // Sync object to ensure we only ever close once 511 quit chan struct{} // Quit channel to request pipeline termination 512 513 ctx context.Context // Context used by the light client to abort filtering 514 err error // Global error to track retrieval failures deep in the chain 515 errLock sync.Mutex 516 517 pend sync.WaitGroup 518 } 519 520 // Close stops the matching process and waits for all subprocesses to terminate 521 // before returning. The timeout may be used for graceful shutdown, allowing the 522 // currently running retrievals to complete before this time. 523 func (s *MatcherSession) Close() { 524 s.closer.Do(func() { 525 // Signal termination and wait for all goroutines to tear down 526 close(s.quit) 527 s.pend.Wait() 528 }) 529 } 530 531 // Error returns any failure encountered during the matching session. 532 func (s *MatcherSession) Error() error { 533 s.errLock.Lock() 534 defer s.errLock.Unlock() 535 536 return s.err 537 } 538 539 // allocateRetrieval assigns a bloom bit index to a client process that can either 540 // immediately request and fetch the section contents assigned to this bit or wait 541 // a little while for more sections to be requested. 542 func (s *MatcherSession) allocateRetrieval() (uint, bool) { 543 fetcher := make(chan uint) 544 545 select { 546 case <-s.quit: 547 return 0, false 548 case s.matcher.retrievers <- fetcher: 549 bit, ok := <-fetcher 550 return bit, ok 551 } 552 } 553 554 // pendingSections returns the number of pending section retrievals belonging to 555 // the given bloom bit index. 556 func (s *MatcherSession) pendingSections(bit uint) int { 557 fetcher := make(chan uint) 558 559 select { 560 case <-s.quit: 561 return 0 562 case s.matcher.counters <- fetcher: 563 fetcher <- bit 564 return int(<-fetcher) 565 } 566 } 567 568 // allocateSections assigns all or part of an already allocated bit-task queue 569 // to the requesting process. 570 func (s *MatcherSession) allocateSections(bit uint, count int) []uint64 { 571 fetcher := make(chan *Retrieval) 572 573 select { 574 case <-s.quit: 575 return nil 576 case s.matcher.retrievals <- fetcher: 577 task := &Retrieval{ 578 Bit: bit, 579 Sections: make([]uint64, count), 580 } 581 fetcher <- task 582 return (<-fetcher).Sections 583 } 584 } 585 586 // deliverSections delivers a batch of section bit-vectors for a specific bloom 587 // bit index to be injected into the processing pipeline. 588 func (s *MatcherSession) deliverSections(bit uint, sections []uint64, bitsets [][]byte) { 589 s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets} 590 } 591 592 // Multiplex polls the matcher session for retrieval tasks and multiplexes it into 593 // the requested retrieval queue to be serviced together with other sessions. 594 // 595 // This method will block for the lifetime of the session. Even after termination 596 // of the session, any request in-flight need to be responded to! Empty responses 597 // are fine though in that case. 598 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 599 for { 600 // Allocate a new bloom bit index to retrieve data for, stopping when done 601 bit, ok := s.allocateRetrieval() 602 if !ok { 603 return 604 } 605 // Bit allocated, throttle a bit if we're below our batch limit 606 if s.pendingSections(bit) < batch { 607 select { 608 case <-s.quit: 609 // Session terminating, we can't meaningfully service, abort 610 s.allocateSections(bit, 0) 611 s.deliverSections(bit, []uint64{}, [][]byte{}) 612 return 613 614 case <-time.After(wait): 615 // Throttling up, fetch whatever is available 616 } 617 } 618 // Allocate as much as we can handle and request servicing 619 sections := s.allocateSections(bit, batch) 620 request := make(chan *Retrieval) 621 622 select { 623 case <-s.quit: 624 // Session terminating, we can't meaningfully service, abort 625 s.deliverSections(bit, sections, make([][]byte, len(sections))) 626 return 627 628 case mux <- request: 629 // Retrieval accepted, something must arrive before we're aborting 630 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 631 632 result := <-request 633 if result.Error != nil { 634 s.errLock.Lock() 635 s.err = result.Error 636 s.errLock.Unlock() 637 s.Close() 638 } 639 s.deliverSections(result.Bit, result.Sections, result.Bitsets) 640 } 641 } 642 }