github.com/klaytn/klaytn@v1.12.1/blockchain/bloombits/matcher.go (about) 1 // Modifications Copyright 2018 The klaytn Authors 2 // Copyright 2017 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from core/bloombits/matcher.go (2018/06/04). 19 // Modified and improved for the klaytn development. 20 21 package bloombits 22 23 import ( 24 "bytes" 25 "context" 26 "errors" 27 "math" 28 "sort" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "github.com/klaytn/klaytn/common/bitutil" 34 "github.com/klaytn/klaytn/crypto" 35 ) 36 37 // bloomIndexes represents the bit indexes inside the bloom filter that belong 38 // to some key. 39 type bloomIndexes [3]uint 40 41 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 42 func calcBloomIndexes(b []byte) bloomIndexes { 43 b = crypto.Keccak256(b) 44 45 var idxs bloomIndexes 46 for i := 0; i < len(idxs); i++ { 47 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 48 } 49 return idxs 50 } 51 52 // partialMatches with a non-nil vector represents a section in which some sub- 53 // matchers have already found potential matches. Subsequent sub-matchers will 54 // binary AND their matches with this vector. If vector is nil, it represents a 55 // section to be processed by the first sub-matcher. 56 type partialMatches struct { 57 section uint64 58 bitset []byte 59 } 60 61 // Retrieval represents a request for retrieval task assignments for a given 62 // bit with the given number of fetch elements, or a response for such a request. 63 // It can also have the actual results set to be used as a delivery data struct. 64 // 65 // The contest and error fields are used by the light client to terminate matching 66 // early if an error is encountered on some path of the pipeline. 67 type Retrieval struct { 68 Bit uint 69 Sections []uint64 70 Bitsets [][]byte 71 72 Context context.Context 73 Error error 74 } 75 76 // Matcher is a pipelined system of schedulers and logic matchers which perform 77 // binary AND/OR operations on the bit-streams, creating a stream of potential 78 // blocks to inspect for data content. 79 type Matcher struct { 80 sectionSize uint64 // Size of the data batches to filter on 81 82 filters [][]bloomIndexes // Filter the system is matching for 83 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 84 85 retrievers chan chan uint // Retriever processes waiting for bit allocations 86 counters chan chan uint // Retriever processes waiting for task count reports 87 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 88 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 89 90 running uint32 // Atomic flag whether a session is live or not 91 } 92 93 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 94 // address and topic filtering on them. Setting a filter component to `nil` is 95 // allowed and will result in that filter rule being skipped (OR 0x11...1). 96 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 97 // Create the matcher instance 98 m := &Matcher{ 99 sectionSize: sectionSize, 100 schedulers: make(map[uint]*scheduler), 101 retrievers: make(chan chan uint), 102 counters: make(chan chan uint), 103 retrievals: make(chan chan *Retrieval), 104 deliveries: make(chan *Retrieval), 105 } 106 // Calculate the bloom bit indexes for the groups we're interested in 107 m.filters = nil 108 109 for _, filter := range filters { 110 // Gather the bit indexes of the filter rule, special casing the nil filter 111 if len(filter) == 0 { 112 continue 113 } 114 bloomBits := make([]bloomIndexes, len(filter)) 115 for i, clause := range filter { 116 if clause == nil { 117 bloomBits = nil 118 break 119 } 120 bloomBits[i] = calcBloomIndexes(clause) 121 } 122 // Accumulate the filter rules if no nil rule was within 123 if bloomBits != nil { 124 m.filters = append(m.filters, bloomBits) 125 } 126 } 127 // For every bit, create a scheduler to load/download the bit vectors 128 for _, bloomIndexLists := range m.filters { 129 for _, bloomIndexList := range bloomIndexLists { 130 for _, bloomIndex := range bloomIndexList { 131 m.addScheduler(bloomIndex) 132 } 133 } 134 } 135 return m 136 } 137 138 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 139 // it has not existed before. If the bit is already selected for filtering, the 140 // existing scheduler can be used. 141 func (m *Matcher) addScheduler(idx uint) { 142 if _, ok := m.schedulers[idx]; ok { 143 return 144 } 145 m.schedulers[idx] = newScheduler(idx) 146 } 147 148 // Start starts the matching process and returns a stream of bloom matches in 149 // a given range of blocks. If there are no more matches in the range, the result 150 // channel is closed. 151 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 152 // Make sure we're not creating concurrent sessions 153 if atomic.SwapUint32(&m.running, 1) == 1 { 154 return nil, errors.New("matcher already running") 155 } 156 defer atomic.StoreUint32(&m.running, 0) 157 158 // Initiate a new matching round 159 session := &MatcherSession{ 160 matcher: m, 161 quit: make(chan struct{}), 162 kill: make(chan struct{}), 163 ctx: ctx, 164 } 165 for _, scheduler := range m.schedulers { 166 scheduler.reset() 167 } 168 sink := m.run(begin, end, cap(results), session) 169 170 // Read the output from the result sink and deliver to the user 171 session.pend.Add(1) 172 go func() { 173 defer session.pend.Done() 174 defer close(results) 175 176 for { 177 select { 178 case <-session.quit: 179 return 180 181 case res, ok := <-sink: 182 // New match result found 183 if !ok { 184 return 185 } 186 // Calculate the first and last blocks of the section 187 sectionStart := res.section * m.sectionSize 188 189 first := sectionStart 190 if begin > first { 191 first = begin 192 } 193 last := sectionStart + m.sectionSize - 1 194 if end < last { 195 last = end 196 } 197 // Iterate over all the blocks in the section and return the matching ones 198 for i := first; i <= last; i++ { 199 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 200 next := res.bitset[(i-sectionStart)/8] 201 if next == 0 { 202 if i%8 == 0 { 203 i += 7 204 } 205 continue 206 } 207 // Some bit it set, do the actual submatching 208 if bit := 7 - i%8; next&(1<<bit) != 0 { 209 select { 210 case <-session.quit: 211 return 212 case results <- i: 213 } 214 } 215 } 216 } 217 } 218 }() 219 return session, nil 220 } 221 222 // run creates a daisy-chain of sub-matchers, one for the address set and one 223 // for each topic set, each sub-matcher receiving a section only if the previous 224 // ones have all found a potential match in one of the blocks of the section, 225 // then binary AND-ing its own matches and forwarding the result to the next one. 226 // 227 // The method starts feeding the section indexes into the first sub-matcher on a 228 // new goroutine and returns a sink channel receiving the results. 229 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 230 // Create the source channel and feed section indexes into 231 source := make(chan *partialMatches, buffer) 232 233 session.pend.Add(1) 234 go func() { 235 defer session.pend.Done() 236 defer close(source) 237 238 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 239 select { 240 case <-session.quit: 241 return 242 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 243 } 244 } 245 }() 246 // Assemble the daisy-chained filtering pipeline 247 next := source 248 dist := make(chan *request, buffer) 249 250 for _, bloom := range m.filters { 251 next = m.subMatch(next, dist, bloom, session) 252 } 253 // Start the request distribution 254 session.pend.Add(1) 255 go m.distributor(dist, session) 256 257 return next 258 } 259 260 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 261 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 262 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 263 // that address/topic, and binary AND-ing those vectors together. 264 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 265 // Start the concurrent schedulers for each bit required by the bloom filter 266 sectionSources := make([][3]chan uint64, len(bloom)) 267 sectionSinks := make([][3]chan []byte, len(bloom)) 268 for i, bits := range bloom { 269 for j, bit := range bits { 270 sectionSources[i][j] = make(chan uint64, cap(source)) 271 sectionSinks[i][j] = make(chan []byte, cap(source)) 272 273 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 274 } 275 } 276 277 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 278 results := make(chan *partialMatches, cap(source)) 279 280 session.pend.Add(2) 281 go func() { 282 // Tear down the goroutine and terminate all source channels 283 defer session.pend.Done() 284 defer close(process) 285 286 defer func() { 287 for _, bloomSources := range sectionSources { 288 for _, bitSource := range bloomSources { 289 close(bitSource) 290 } 291 } 292 }() 293 // Read sections from the source channel and multiplex into all bit-schedulers 294 for { 295 select { 296 case <-session.quit: 297 return 298 299 case subres, ok := <-source: 300 // New subresult from previous link 301 if !ok { 302 return 303 } 304 // Multiplex the section index to all bit-schedulers 305 for _, bloomSources := range sectionSources { 306 for _, bitSource := range bloomSources { 307 select { 308 case <-session.quit: 309 return 310 case bitSource <- subres.section: 311 } 312 } 313 } 314 // Notify the processor that this section will become available 315 select { 316 case <-session.quit: 317 return 318 case process <- subres: 319 } 320 } 321 } 322 }() 323 324 go func() { 325 // Tear down the goroutine and terminate the final sink channel 326 defer session.pend.Done() 327 defer close(results) 328 329 // Read the source notifications and collect the delivered results 330 for { 331 select { 332 case <-session.quit: 333 return 334 335 case subres, ok := <-process: 336 // Notified of a section being retrieved 337 if !ok { 338 return 339 } 340 // Gather all the sub-results and merge them together 341 var orVector []byte 342 for _, bloomSinks := range sectionSinks { 343 var andVector []byte 344 for _, bitSink := range bloomSinks { 345 var data []byte 346 select { 347 case <-session.quit: 348 return 349 case data = <-bitSink: 350 } 351 if andVector == nil { 352 andVector = make([]byte, int(m.sectionSize/8)) 353 copy(andVector, data) 354 } else { 355 bitutil.ANDBytes(andVector, andVector, data) 356 } 357 } 358 if orVector == nil { 359 orVector = andVector 360 } else { 361 bitutil.ORBytes(orVector, orVector, andVector) 362 } 363 } 364 365 if orVector == nil { 366 orVector = make([]byte, int(m.sectionSize/8)) 367 } 368 if subres.bitset != nil { 369 bitutil.ANDBytes(orVector, orVector, subres.bitset) 370 } 371 if bitutil.TestBytes(orVector) { 372 select { 373 case <-session.quit: 374 return 375 case results <- &partialMatches{subres.section, orVector}: 376 } 377 } 378 } 379 } 380 }() 381 return results 382 } 383 384 // distributor receives requests from the schedulers and queues them into a set 385 // of pending requests, which are assigned to retrievers wanting to fulfil them. 386 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 387 defer session.pend.Done() 388 389 var ( 390 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 391 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 392 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 393 ) 394 var ( 395 allocs int // Number of active allocations to handle graceful shutdown requests 396 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 397 ) 398 399 // assign is a helper method fo try to assign a pending bit an actively 400 // listening servicer, or schedule it up for later when one arrives. 401 assign := func(bit uint) { 402 select { 403 case fetcher := <-m.retrievers: 404 allocs++ 405 fetcher <- bit 406 default: 407 // No retrievers active, start listening for new ones 408 retrievers = m.retrievers 409 unallocs[bit] = struct{}{} 410 } 411 } 412 413 for { 414 select { 415 case <-shutdown: 416 // Graceful shutdown requested, wait until all pending requests are honoured 417 if allocs == 0 { 418 return 419 } 420 shutdown = nil 421 422 case <-session.kill: 423 // Pending requests not honoured in time, hard terminate 424 return 425 426 case req := <-dist: 427 // New retrieval request arrived to be distributed to some fetcher process 428 queue := requests[req.bit] 429 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 430 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 431 432 // If it's a new bit and we have waiting fetchers, allocate to them 433 if len(queue) == 0 { 434 assign(req.bit) 435 } 436 437 case fetcher := <-retrievers: 438 // New retriever arrived, find the lowest section-ed bit to assign 439 bit, best := uint(0), uint64(math.MaxUint64) 440 for idx := range unallocs { 441 if requests[idx][0] < best { 442 bit, best = idx, requests[idx][0] 443 } 444 } 445 // Stop tracking this bit (and alloc notifications if no more work is available) 446 delete(unallocs, bit) 447 if len(unallocs) == 0 { 448 retrievers = nil 449 } 450 allocs++ 451 fetcher <- bit 452 453 case fetcher := <-m.counters: 454 // New task count request arrives, return number of items 455 fetcher <- uint(len(requests[<-fetcher])) 456 457 case fetcher := <-m.retrievals: 458 // New fetcher waiting for tasks to retrieve, assign 459 task := <-fetcher 460 if want := len(task.Sections); want >= len(requests[task.Bit]) { 461 task.Sections = requests[task.Bit] 462 delete(requests, task.Bit) 463 } else { 464 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 465 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 466 } 467 fetcher <- task 468 469 // If anything was left unallocated, try to assign to someone else 470 if len(requests[task.Bit]) > 0 { 471 assign(task.Bit) 472 } 473 474 case result := <-m.deliveries: 475 // New retrieval task response from fetcher, split out missing sections and 476 // deliver complete ones 477 var ( 478 sections = make([]uint64, 0, len(result.Sections)) 479 bitsets = make([][]byte, 0, len(result.Bitsets)) 480 missing = make([]uint64, 0, len(result.Sections)) 481 ) 482 for i, bitset := range result.Bitsets { 483 if len(bitset) == 0 { 484 missing = append(missing, result.Sections[i]) 485 continue 486 } 487 sections = append(sections, result.Sections[i]) 488 bitsets = append(bitsets, bitset) 489 } 490 m.schedulers[result.Bit].deliver(sections, bitsets) 491 allocs-- 492 493 // Reschedule missing sections and allocate bit if newly available 494 if len(missing) > 0 { 495 queue := requests[result.Bit] 496 for _, section := range missing { 497 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 498 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 499 } 500 requests[result.Bit] = queue 501 502 if len(queue) == len(missing) { 503 assign(result.Bit) 504 } 505 } 506 // If we're in the process of shutting down, terminate 507 if allocs == 0 && shutdown == nil { 508 return 509 } 510 } 511 } 512 } 513 514 // MatcherSession is returned by a started matcher to be used as a terminator 515 // for the actively running matching operation. 516 type MatcherSession struct { 517 matcher *Matcher 518 519 closer sync.Once // Sync object to ensure we only ever close once 520 quit chan struct{} // Quit channel to request pipeline termination 521 kill chan struct{} // Term channel to signal non-graceful forced shutdown 522 523 ctx context.Context // Context used by the light client to abort filtering 524 err atomic.Value // Global error to track retrieval failures deep in the chain 525 526 pend sync.WaitGroup 527 } 528 529 // Close stops the matching process and waits for all subprocesses to terminate 530 // before returning. The timeout may be used for graceful shutdown, allowing the 531 // currently running retrievals to complete before this time. 532 func (s *MatcherSession) Close() { 533 s.closer.Do(func() { 534 // Signal termination and wait for all goroutines to tear down 535 close(s.quit) 536 time.AfterFunc(time.Second, func() { close(s.kill) }) 537 s.pend.Wait() 538 }) 539 } 540 541 // Error returns any failure encountered during the matching session. 542 func (s *MatcherSession) Error() error { 543 if err := s.err.Load(); err != nil { 544 return err.(error) 545 } 546 return nil 547 } 548 549 // AllocateRetrieval assigns a bloom bit index to a client process that can either 550 // immediately request and fetch the section contents assigned to this bit or wait 551 // a little while for more sections to be requested. 552 func (s *MatcherSession) AllocateRetrieval() (uint, bool) { 553 fetcher := make(chan uint) 554 555 select { 556 case <-s.quit: 557 return 0, false 558 case s.matcher.retrievers <- fetcher: 559 bit, ok := <-fetcher 560 return bit, ok 561 } 562 } 563 564 // PendingSections returns the number of pending section retrievals belonging to 565 // the given bloom bit index. 566 func (s *MatcherSession) PendingSections(bit uint) int { 567 fetcher := make(chan uint) 568 569 select { 570 case <-s.quit: 571 return 0 572 case s.matcher.counters <- fetcher: 573 fetcher <- bit 574 return int(<-fetcher) 575 } 576 } 577 578 // AllocateSections assigns all or part of an already allocated bit-task queue 579 // to the requesting process. 580 func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { 581 fetcher := make(chan *Retrieval) 582 583 select { 584 case <-s.quit: 585 return nil 586 case s.matcher.retrievals <- fetcher: 587 task := &Retrieval{ 588 Bit: bit, 589 Sections: make([]uint64, count), 590 } 591 fetcher <- task 592 return (<-fetcher).Sections 593 } 594 } 595 596 // DeliverSections delivers a batch of section bit-vectors for a specific bloom 597 // bit index to be injected into the processing pipeline. 598 func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { 599 select { 600 case <-s.kill: 601 return 602 case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: 603 } 604 } 605 606 // Multiplex polls the matcher session for retrieval tasks and multiplexes it into 607 // the requested retrieval queue to be serviced together with other sessions. 608 // 609 // This method will block for the lifetime of the session. Even after termination 610 // of the session, any request in-flight need to be responded to! Empty responses 611 // are fine though in that case. 612 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 613 for { 614 // Allocate a new bloom bit index to retrieve data for, stopping when done 615 bit, ok := s.AllocateRetrieval() 616 if !ok { 617 return 618 } 619 // Bit allocated, throttle a bit if we're below our batch limit 620 if s.PendingSections(bit) < batch { 621 select { 622 case <-s.quit: 623 // Session terminating, we can't meaningfully service, abort 624 s.AllocateSections(bit, 0) 625 s.DeliverSections(bit, []uint64{}, [][]byte{}) 626 return 627 628 case <-time.After(wait): 629 // Throttling up, fetch whatever's available 630 } 631 } 632 // Allocate as much as we can handle and request servicing 633 sections := s.AllocateSections(bit, batch) 634 request := make(chan *Retrieval) 635 636 select { 637 case <-s.quit: 638 // Session terminating, we can't meaningfully service, abort 639 s.DeliverSections(bit, sections, make([][]byte, len(sections))) 640 return 641 642 case mux <- request: 643 // Retrieval accepted, something must arrive before we're aborting 644 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 645 646 result := <-request 647 if result.Error != nil { 648 s.err.Store(result.Error) 649 s.Close() 650 } 651 s.DeliverSections(result.Bit, result.Sections, result.Bitsets) 652 } 653 } 654 }