github.com/neatlab/neatio@v1.7.3-0.20220425043230-d903e92fcc75/chain/core/bloombits/matcher.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package bloombits 18 19 import ( 20 "bytes" 21 "context" 22 "errors" 23 "math" 24 "sort" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/neatlab/neatio/utilities/common/bitutil" 30 "github.com/neatlab/neatio/utilities/crypto" 31 ) 32 33 // bloomIndexes represents the bit indexes inside the bloom filter that belong 34 // to some key. 35 type bloomIndexes [3]uint 36 37 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 38 func calcBloomIndexes(b []byte) bloomIndexes { 39 b = crypto.Keccak256(b) 40 41 var idxs bloomIndexes 42 for i := 0; i < len(idxs); i++ { 43 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 44 } 45 return idxs 46 } 47 48 // partialMatches with a non-nil vector represents a section in which some sub- 49 // matchers have already found potential matches. Subsequent sub-matchers will 50 // binary AND their matches with this vector. If vector is nil, it represents a 51 // section to be processed by the first sub-matcher. 52 type partialMatches struct { 53 section uint64 54 bitset []byte 55 } 56 57 // Retrieval represents a request for retrieval task assignments for a given 58 // bit with the given number of fetch elements, or a response for such a request. 59 // It can also have the actual results set to be used as a delivery data struct. 60 // 61 // The contest and error fields are used by the light client to terminate matching 62 // early if an error is enountered on some path of the pipeline. 63 type Retrieval struct { 64 Bit uint 65 Sections []uint64 66 Bitsets [][]byte 67 68 Context context.Context 69 Error error 70 } 71 72 // Matcher is a pipelined system of schedulers and logic matchers which perform 73 // binary AND/OR operations on the bit-streams, creating a stream of potential 74 // blocks to inspect for data content. 75 type Matcher struct { 76 sectionSize uint64 // Size of the data batches to filter on 77 78 filters [][]bloomIndexes // Filter the system is matching for 79 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 80 81 retrievers chan chan uint // Retriever processes waiting for bit allocations 82 counters chan chan uint // Retriever processes waiting for task count reports 83 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 84 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 85 86 running uint32 // Atomic flag whether a session is live or not 87 } 88 89 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 90 // address and topic filtering on them. Setting a filter component to `nil` is 91 // allowed and will result in that filter rule being skipped (OR 0x11...1). 92 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 93 // Create the matcher instance 94 m := &Matcher{ 95 sectionSize: sectionSize, 96 schedulers: make(map[uint]*scheduler), 97 retrievers: make(chan chan uint), 98 counters: make(chan chan uint), 99 retrievals: make(chan chan *Retrieval), 100 deliveries: make(chan *Retrieval), 101 } 102 // Calculate the bloom bit indexes for the groups we're interested in 103 m.filters = nil 104 105 for _, filter := range filters { 106 // Gather the bit indexes of the filter rule, special casing the nil filter 107 if len(filter) == 0 { 108 continue 109 } 110 bloomBits := make([]bloomIndexes, len(filter)) 111 for i, clause := range filter { 112 if clause == nil { 113 bloomBits = nil 114 break 115 } 116 bloomBits[i] = calcBloomIndexes(clause) 117 } 118 // Accumulate the filter rules if no nil rule was within 119 if bloomBits != nil { 120 m.filters = append(m.filters, bloomBits) 121 } 122 } 123 // For every bit, create a scheduler to load/download the bit vectors 124 for _, bloomIndexLists := range m.filters { 125 for _, bloomIndexList := range bloomIndexLists { 126 for _, bloomIndex := range bloomIndexList { 127 m.addScheduler(bloomIndex) 128 } 129 } 130 } 131 return m 132 } 133 134 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 135 // it has not existed before. If the bit is already selected for filtering, the 136 // existing scheduler can be used. 137 func (m *Matcher) addScheduler(idx uint) { 138 if _, ok := m.schedulers[idx]; ok { 139 return 140 } 141 m.schedulers[idx] = newScheduler(idx) 142 } 143 144 // Start starts the matching process and returns a stream of bloom matches in 145 // a given range of blocks. If there are no more matches in the range, the result 146 // channel is closed. 147 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 148 // Make sure we're not creating concurrent sessions 149 if atomic.SwapUint32(&m.running, 1) == 1 { 150 return nil, errors.New("matcher already running") 151 } 152 defer atomic.StoreUint32(&m.running, 0) 153 154 // Initiate a new matching round 155 session := &MatcherSession{ 156 matcher: m, 157 quit: make(chan struct{}), 158 kill: make(chan struct{}), 159 ctx: ctx, 160 } 161 for _, scheduler := range m.schedulers { 162 scheduler.reset() 163 } 164 sink := m.run(begin, end, cap(results), session) 165 166 // Read the output from the result sink and deliver to the user 167 session.pend.Add(1) 168 go func() { 169 defer session.pend.Done() 170 defer close(results) 171 172 for { 173 select { 174 case <-session.quit: 175 return 176 177 case res, ok := <-sink: 178 // New match result found 179 if !ok { 180 return 181 } 182 // Calculate the first and last blocks of the section 183 sectionStart := res.section * m.sectionSize 184 185 first := sectionStart 186 if begin > first { 187 first = begin 188 } 189 last := sectionStart + m.sectionSize - 1 190 if end < last { 191 last = end 192 } 193 // Iterate over all the blocks in the section and return the matching ones 194 for i := first; i <= last; i++ { 195 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 196 next := res.bitset[(i-sectionStart)/8] 197 if next == 0 { 198 if i%8 == 0 { 199 i += 7 200 } 201 continue 202 } 203 // Some bit it set, do the actual submatching 204 if bit := 7 - i%8; next&(1<<bit) != 0 { 205 select { 206 case <-session.quit: 207 return 208 case results <- i: 209 } 210 } 211 } 212 } 213 } 214 }() 215 return session, nil 216 } 217 218 // run creates a daisy-chain of sub-matchers, one for the address set and one 219 // for each topic set, each sub-matcher receiving a section only if the previous 220 // ones have all found a potential match in one of the blocks of the section, 221 // then binary AND-ing its own matches and forwaring the result to the next one. 222 // 223 // The method starts feeding the section indexes into the first sub-matcher on a 224 // new goroutine and returns a sink channel receiving the results. 225 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 226 // Create the source channel and feed section indexes into 227 source := make(chan *partialMatches, buffer) 228 229 session.pend.Add(1) 230 go func() { 231 defer session.pend.Done() 232 defer close(source) 233 234 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 235 select { 236 case <-session.quit: 237 return 238 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 239 } 240 } 241 }() 242 // Assemble the daisy-chained filtering pipeline 243 next := source 244 dist := make(chan *request, buffer) 245 246 for _, bloom := range m.filters { 247 next = m.subMatch(next, dist, bloom, session) 248 } 249 // Start the request distribution 250 session.pend.Add(1) 251 go m.distributor(dist, session) 252 253 return next 254 } 255 256 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 257 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 258 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 259 // that address/topic, and binary AND-ing those vectors together. 260 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 261 // Start the concurrent schedulers for each bit required by the bloom filter 262 sectionSources := make([][3]chan uint64, len(bloom)) 263 sectionSinks := make([][3]chan []byte, len(bloom)) 264 for i, bits := range bloom { 265 for j, bit := range bits { 266 sectionSources[i][j] = make(chan uint64, cap(source)) 267 sectionSinks[i][j] = make(chan []byte, cap(source)) 268 269 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 270 } 271 } 272 273 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 274 results := make(chan *partialMatches, cap(source)) 275 276 session.pend.Add(2) 277 go func() { 278 // Tear down the goroutine and terminate all source channels 279 defer session.pend.Done() 280 defer close(process) 281 282 defer func() { 283 for _, bloomSources := range sectionSources { 284 for _, bitSource := range bloomSources { 285 close(bitSource) 286 } 287 } 288 }() 289 // Read sections from the source channel and multiplex into all bit-schedulers 290 for { 291 select { 292 case <-session.quit: 293 return 294 295 case subres, ok := <-source: 296 // New subresult from previous link 297 if !ok { 298 return 299 } 300 // Multiplex the section index to all bit-schedulers 301 for _, bloomSources := range sectionSources { 302 for _, bitSource := range bloomSources { 303 select { 304 case <-session.quit: 305 return 306 case bitSource <- subres.section: 307 } 308 } 309 } 310 // Notify the processor that this section will become available 311 select { 312 case <-session.quit: 313 return 314 case process <- subres: 315 } 316 } 317 } 318 }() 319 320 go func() { 321 // Tear down the goroutine and terminate the final sink channel 322 defer session.pend.Done() 323 defer close(results) 324 325 // Read the source notifications and collect the delivered results 326 for { 327 select { 328 case <-session.quit: 329 return 330 331 case subres, ok := <-process: 332 // Notified of a section being retrieved 333 if !ok { 334 return 335 } 336 // Gather all the sub-results and merge them together 337 var orVector []byte 338 for _, bloomSinks := range sectionSinks { 339 var andVector []byte 340 for _, bitSink := range bloomSinks { 341 var data []byte 342 select { 343 case <-session.quit: 344 return 345 case data = <-bitSink: 346 } 347 if andVector == nil { 348 andVector = make([]byte, int(m.sectionSize/8)) 349 copy(andVector, data) 350 } else { 351 bitutil.ANDBytes(andVector, andVector, data) 352 } 353 } 354 if orVector == nil { 355 orVector = andVector 356 } else { 357 bitutil.ORBytes(orVector, orVector, andVector) 358 } 359 } 360 361 if orVector == nil { 362 orVector = make([]byte, int(m.sectionSize/8)) 363 } 364 if subres.bitset != nil { 365 bitutil.ANDBytes(orVector, orVector, subres.bitset) 366 } 367 if bitutil.TestBytes(orVector) { 368 select { 369 case <-session.quit: 370 return 371 case results <- &partialMatches{subres.section, orVector}: 372 } 373 } 374 } 375 } 376 }() 377 return results 378 } 379 380 // distributor receives requests from the schedulers and queues them into a set 381 // of pending requests, which are assigned to retrievers wanting to fulfil them. 382 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 383 defer session.pend.Done() 384 385 var ( 386 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 387 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 388 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 389 ) 390 var ( 391 allocs int // Number of active allocations to handle graceful shutdown requests 392 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 393 ) 394 395 // assign is a helper method fo try to assign a pending bit an an actively 396 // listening servicer, or schedule it up for later when one arrives. 397 assign := func(bit uint) { 398 select { 399 case fetcher := <-m.retrievers: 400 allocs++ 401 fetcher <- bit 402 default: 403 // No retrievers active, start listening for new ones 404 retrievers = m.retrievers 405 unallocs[bit] = struct{}{} 406 } 407 } 408 409 for { 410 select { 411 case <-shutdown: 412 // Graceful shutdown requested, wait until all pending requests are honoured 413 if allocs == 0 { 414 return 415 } 416 shutdown = nil 417 418 case <-session.kill: 419 // Pending requests not honoured in time, hard terminate 420 return 421 422 case req := <-dist: 423 // New retrieval request arrived to be distributed to some fetcher process 424 queue := requests[req.bit] 425 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 426 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 427 428 // If it's a new bit and we have waiting fetchers, allocate to them 429 if len(queue) == 0 { 430 assign(req.bit) 431 } 432 433 case fetcher := <-retrievers: 434 // New retriever arrived, find the lowest section-ed bit to assign 435 bit, best := uint(0), uint64(math.MaxUint64) 436 for idx := range unallocs { 437 if requests[idx][0] < best { 438 bit, best = idx, requests[idx][0] 439 } 440 } 441 // Stop tracking this bit (and alloc notifications if no more work is available) 442 delete(unallocs, bit) 443 if len(unallocs) == 0 { 444 retrievers = nil 445 } 446 allocs++ 447 fetcher <- bit 448 449 case fetcher := <-m.counters: 450 // New task count request arrives, return number of items 451 fetcher <- uint(len(requests[<-fetcher])) 452 453 case fetcher := <-m.retrievals: 454 // New fetcher waiting for tasks to retrieve, assign 455 task := <-fetcher 456 if want := len(task.Sections); want >= len(requests[task.Bit]) { 457 task.Sections = requests[task.Bit] 458 delete(requests, task.Bit) 459 } else { 460 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 461 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 462 } 463 fetcher <- task 464 465 // If anything was left unallocated, try to assign to someone else 466 if len(requests[task.Bit]) > 0 { 467 assign(task.Bit) 468 } 469 470 case result := <-m.deliveries: 471 // New retrieval task response from fetcher, split out missing sections and 472 // deliver complete ones 473 var ( 474 sections = make([]uint64, 0, len(result.Sections)) 475 bitsets = make([][]byte, 0, len(result.Bitsets)) 476 missing = make([]uint64, 0, len(result.Sections)) 477 ) 478 for i, bitset := range result.Bitsets { 479 if len(bitset) == 0 { 480 missing = append(missing, result.Sections[i]) 481 continue 482 } 483 sections = append(sections, result.Sections[i]) 484 bitsets = append(bitsets, bitset) 485 } 486 m.schedulers[result.Bit].deliver(sections, bitsets) 487 allocs-- 488 489 // Reschedule missing sections and allocate bit if newly available 490 if len(missing) > 0 { 491 queue := requests[result.Bit] 492 for _, section := range missing { 493 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 494 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 495 } 496 requests[result.Bit] = queue 497 498 if len(queue) == len(missing) { 499 assign(result.Bit) 500 } 501 } 502 // If we're in the process of shutting down, terminate 503 if allocs == 0 && shutdown == nil { 504 return 505 } 506 } 507 } 508 } 509 510 // MatcherSession is returned by a started matcher to be used as a terminator 511 // for the actively running matching operation. 512 type MatcherSession struct { 513 matcher *Matcher 514 515 closer sync.Once // Sync object to ensure we only ever close once 516 quit chan struct{} // Quit channel to request pipeline termination 517 kill chan struct{} // Term channel to signal non-graceful forced shutdown 518 519 ctx context.Context // Context used by the light client to abort filtering 520 err atomic.Value // Global error to track retrieval failures deep in the chain 521 522 pend sync.WaitGroup 523 } 524 525 // Close stops the matching process and waits for all subprocesses to terminate 526 // before returning. The timeout may be used for graceful shutdown, allowing the 527 // currently running retrievals to complete before this time. 528 func (s *MatcherSession) Close() { 529 s.closer.Do(func() { 530 // Signal termination and wait for all goroutines to tear down 531 close(s.quit) 532 time.AfterFunc(time.Second, func() { close(s.kill) }) 533 s.pend.Wait() 534 }) 535 } 536 537 // Error returns any failure encountered during the matching session. 538 func (s *MatcherSession) Error() error { 539 if err := s.err.Load(); err != nil { 540 return err.(error) 541 } 542 return nil 543 } 544 545 // AllocateRetrieval assigns a bloom bit index to a client process that can either 546 // immediately reuest and fetch the section contents assigned to this bit or wait 547 // a little while for more sections to be requested. 548 func (s *MatcherSession) AllocateRetrieval() (uint, bool) { 549 fetcher := make(chan uint) 550 551 select { 552 case <-s.quit: 553 return 0, false 554 case s.matcher.retrievers <- fetcher: 555 bit, ok := <-fetcher 556 return bit, ok 557 } 558 } 559 560 // PendingSections returns the number of pending section retrievals belonging to 561 // the given bloom bit index. 562 func (s *MatcherSession) PendingSections(bit uint) int { 563 fetcher := make(chan uint) 564 565 select { 566 case <-s.quit: 567 return 0 568 case s.matcher.counters <- fetcher: 569 fetcher <- bit 570 return int(<-fetcher) 571 } 572 } 573 574 // AllocateSections assigns all or part of an already allocated bit-task queue 575 // to the requesting process. 576 func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { 577 fetcher := make(chan *Retrieval) 578 579 select { 580 case <-s.quit: 581 return nil 582 case s.matcher.retrievals <- fetcher: 583 task := &Retrieval{ 584 Bit: bit, 585 Sections: make([]uint64, count), 586 } 587 fetcher <- task 588 return (<-fetcher).Sections 589 } 590 } 591 592 // DeliverSections delivers a batch of section bit-vectors for a specific bloom 593 // bit index to be injected into the processing pipeline. 594 func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { 595 select { 596 case <-s.kill: 597 return 598 case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: 599 } 600 } 601 602 // Multiplex polls the matcher session for rerieval tasks and multiplexes it into 603 // the reuested retrieval queue to be serviced together with other sessions. 604 // 605 // This method will block for the lifetime of the session. Even after termination 606 // of the session, any request in-flight need to be responded to! Empty responses 607 // are fine though in that case. 608 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 609 for { 610 // Allocate a new bloom bit index to retrieve data for, stopping when done 611 bit, ok := s.AllocateRetrieval() 612 if !ok { 613 return 614 } 615 // Bit allocated, throttle a bit if we're below our batch limit 616 if s.PendingSections(bit) < batch { 617 select { 618 case <-s.quit: 619 // Session terminating, we can't meaningfully service, abort 620 s.AllocateSections(bit, 0) 621 s.DeliverSections(bit, []uint64{}, [][]byte{}) 622 return 623 624 case <-time.After(wait): 625 // Throttling up, fetch whatever's available 626 } 627 } 628 // Allocate as much as we can handle and request servicing 629 sections := s.AllocateSections(bit, batch) 630 request := make(chan *Retrieval) 631 632 select { 633 case <-s.quit: 634 // Session terminating, we can't meaningfully service, abort 635 s.DeliverSections(bit, sections, make([][]byte, len(sections))) 636 return 637 638 case mux <- request: 639 // Retrieval accepted, something must arrive before we're aborting 640 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 641 642 result := <-request 643 if result.Error != nil { 644 s.err.Store(result.Error) 645 s.Close() 646 } 647 s.DeliverSections(result.Bit, result.Sections, result.Bitsets) 648 } 649 } 650 }