github.com/waltonchain/waltonchain_gwtc_src@v1.1.4-0.20201225072101-8a298c95a819/core/bloombits/matcher.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-wtc library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-wtc library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package bloombits 18 19 import ( 20 "bytes" 21 "errors" 22 "math" 23 "sort" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "github.com/wtc/go-wtc/common/bitutil" 29 "github.com/wtc/go-wtc/crypto" 30 ) 31 32 // bloomIndexes represents the bit indexes inside the bloom filter that belong 33 // to some key. 34 type bloomIndexes [3]uint 35 36 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 37 func calcBloomIndexes(b []byte) bloomIndexes { 38 b = crypto.Keccak256(b) 39 40 var idxs bloomIndexes 41 for i := 0; i < len(idxs); i++ { 42 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 43 } 44 return idxs 45 } 46 47 // partialMatches with a non-nil vector represents a section in which some sub- 48 // matchers have already found potential matches. Subsequent sub-matchers will 49 // binary AND their matches with this vector. If vector is nil, it represents a 50 // section to be processed by the first sub-matcher. 51 type partialMatches struct { 52 section uint64 53 bitset []byte 54 } 55 56 // Retrieval represents a request for retrieval task assignments for a given 57 // bit with the given number of fetch elements, or a response for such a request. 58 // It can also have the actual results set to be used as a delivery data struct. 59 type Retrieval struct { 60 Bit uint 61 Sections []uint64 62 Bitsets [][]byte 63 } 64 65 // Matcher is a pipelined system of schedulers and logic matchers which perform 66 // binary AND/OR operations on the bit-streams, creating a stream of potential 67 // blocks to inspect for data content. 68 type Matcher struct { 69 sectionSize uint64 // Size of the data batches to filter on 70 71 filters [][]bloomIndexes // Filter the system is matching for 72 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 73 74 retrievers chan chan uint // Retriever processes waiting for bit allocations 75 counters chan chan uint // Retriever processes waiting for task count reports 76 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 77 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 78 79 running uint32 // Atomic flag whether a session is live or not 80 } 81 82 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 83 // address and topic filtering on them. Setting a filter component to `nil` is 84 // allowed and will result in that filter rule being skipped (OR 0x11...1). 85 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 86 // Create the matcher instance 87 m := &Matcher{ 88 sectionSize: sectionSize, 89 schedulers: make(map[uint]*scheduler), 90 retrievers: make(chan chan uint), 91 counters: make(chan chan uint), 92 retrievals: make(chan chan *Retrieval), 93 deliveries: make(chan *Retrieval), 94 } 95 // Calculate the bloom bit indexes for the groups we're interested in 96 m.filters = nil 97 98 for _, filter := range filters { 99 // Gather the bit indexes of the filter rule, special casing the nil filter 100 if len(filter) == 0 { 101 continue 102 } 103 bloomBits := make([]bloomIndexes, len(filter)) 104 for i, clause := range filter { 105 if clause == nil { 106 bloomBits = nil 107 break 108 } 109 bloomBits[i] = calcBloomIndexes(clause) 110 } 111 // Accumulate the filter rules if no nil rule was within 112 if bloomBits != nil { 113 m.filters = append(m.filters, bloomBits) 114 } 115 } 116 // For every bit, create a scheduler to load/download the bit vectors 117 for _, bloomIndexLists := range m.filters { 118 for _, bloomIndexList := range bloomIndexLists { 119 for _, bloomIndex := range bloomIndexList { 120 m.addScheduler(bloomIndex) 121 } 122 } 123 } 124 return m 125 } 126 127 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 128 // it has not existed before. If the bit is already selected for filtering, the 129 // existing scheduler can be used. 130 func (m *Matcher) addScheduler(idx uint) { 131 if _, ok := m.schedulers[idx]; ok { 132 return 133 } 134 m.schedulers[idx] = newScheduler(idx) 135 } 136 137 // Start starts the matching process and returns a stream of bloom matches in 138 // a given range of blocks. If there are no more matches in the range, the result 139 // channel is closed. 140 func (m *Matcher) Start(begin, end uint64, results chan uint64) (*MatcherSession, error) { 141 // Make sure we're not creating concurrent sessions 142 if atomic.SwapUint32(&m.running, 1) == 1 { 143 return nil, errors.New("matcher already running") 144 } 145 defer atomic.StoreUint32(&m.running, 0) 146 147 // Initiate a new matching round 148 session := &MatcherSession{ 149 matcher: m, 150 quit: make(chan struct{}), 151 kill: make(chan struct{}), 152 } 153 for _, scheduler := range m.schedulers { 154 scheduler.reset() 155 } 156 sink := m.run(begin, end, cap(results), session) 157 158 // Read the output from the result sink and deliver to the user 159 session.pend.Add(1) 160 go func() { 161 defer session.pend.Done() 162 defer close(results) 163 164 for { 165 select { 166 case <-session.quit: 167 return 168 169 case res, ok := <-sink: 170 // New match result found 171 if !ok { 172 return 173 } 174 // Calculate the first and last blocks of the section 175 sectionStart := res.section * m.sectionSize 176 177 first := sectionStart 178 if begin > first { 179 first = begin 180 } 181 last := sectionStart + m.sectionSize - 1 182 if end < last { 183 last = end 184 } 185 // Iterate over all the blocks in the section and return the matching ones 186 for i := first; i <= last; i++ { 187 // Skip the entire byte if no matches are found inside 188 next := res.bitset[(i-sectionStart)/8] 189 if next == 0 { 190 i += 7 191 continue 192 } 193 // Some bit it set, do the actual submatching 194 if bit := 7 - i%8; next&(1<<bit) != 0 { 195 select { 196 case <-session.quit: 197 return 198 case results <- i: 199 } 200 } 201 } 202 } 203 } 204 }() 205 return session, nil 206 } 207 208 // run creates a daisy-chain of sub-matchers, one for the address set and one 209 // for each topic set, each sub-matcher receiving a section only if the previous 210 // ones have all found a potential match in one of the blocks of the section, 211 // then binary AND-ing its own matches and forwaring the result to the next one. 212 // 213 // The method starts feeding the section indexes into the first sub-matcher on a 214 // new goroutine and returns a sink channel receiving the results. 215 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 216 // Create the source channel and feed section indexes into 217 source := make(chan *partialMatches, buffer) 218 219 session.pend.Add(1) 220 go func() { 221 defer session.pend.Done() 222 defer close(source) 223 224 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 225 select { 226 case <-session.quit: 227 return 228 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 229 } 230 } 231 }() 232 // Assemble the daisy-chained filtering pipeline 233 next := source 234 dist := make(chan *request, buffer) 235 236 for _, bloom := range m.filters { 237 next = m.subMatch(next, dist, bloom, session) 238 } 239 // Start the request distribution 240 session.pend.Add(1) 241 go m.distributor(dist, session) 242 243 return next 244 } 245 246 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 247 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 248 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 249 // that address/topic, and binary AND-ing those vectors together. 250 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 251 // Start the concurrent schedulers for each bit required by the bloom filter 252 sectionSources := make([][3]chan uint64, len(bloom)) 253 sectionSinks := make([][3]chan []byte, len(bloom)) 254 for i, bits := range bloom { 255 for j, bit := range bits { 256 sectionSources[i][j] = make(chan uint64, cap(source)) 257 sectionSinks[i][j] = make(chan []byte, cap(source)) 258 259 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 260 } 261 } 262 263 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 264 results := make(chan *partialMatches, cap(source)) 265 266 session.pend.Add(2) 267 go func() { 268 // Tear down the goroutine and terminate all source channels 269 defer session.pend.Done() 270 defer close(process) 271 272 defer func() { 273 for _, bloomSources := range sectionSources { 274 for _, bitSource := range bloomSources { 275 close(bitSource) 276 } 277 } 278 }() 279 // Read sections from the source channel and multiplex into all bit-schedulers 280 for { 281 select { 282 case <-session.quit: 283 return 284 285 case subres, ok := <-source: 286 // New subresult from previous link 287 if !ok { 288 return 289 } 290 // Multiplex the section index to all bit-schedulers 291 for _, bloomSources := range sectionSources { 292 for _, bitSource := range bloomSources { 293 select { 294 case <-session.quit: 295 return 296 case bitSource <- subres.section: 297 } 298 } 299 } 300 // Notify the processor that this section will become available 301 select { 302 case <-session.quit: 303 return 304 case process <- subres: 305 } 306 } 307 } 308 }() 309 310 go func() { 311 // Tear down the goroutine and terminate the final sink channel 312 defer session.pend.Done() 313 defer close(results) 314 315 // Read the source notifications and collect the delivered results 316 for { 317 select { 318 case <-session.quit: 319 return 320 321 case subres, ok := <-process: 322 // Notified of a section being retrieved 323 if !ok { 324 return 325 } 326 // Gather all the sub-results and merge them together 327 var orVector []byte 328 for _, bloomSinks := range sectionSinks { 329 var andVector []byte 330 for _, bitSink := range bloomSinks { 331 var data []byte 332 select { 333 case <-session.quit: 334 return 335 case data = <-bitSink: 336 } 337 if andVector == nil { 338 andVector = make([]byte, int(m.sectionSize/8)) 339 copy(andVector, data) 340 } else { 341 bitutil.ANDBytes(andVector, andVector, data) 342 } 343 } 344 if orVector == nil { 345 orVector = andVector 346 } else { 347 bitutil.ORBytes(orVector, orVector, andVector) 348 } 349 } 350 351 if orVector == nil { 352 orVector = make([]byte, int(m.sectionSize/8)) 353 } 354 if subres.bitset != nil { 355 bitutil.ANDBytes(orVector, orVector, subres.bitset) 356 } 357 if bitutil.TestBytes(orVector) { 358 select { 359 case <-session.quit: 360 return 361 case results <- &partialMatches{subres.section, orVector}: 362 } 363 } 364 } 365 } 366 }() 367 return results 368 } 369 370 // distributor receives requests from the schedulers and queues them into a set 371 // of pending requests, which are assigned to retrievers wanting to fulfil them. 372 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 373 defer session.pend.Done() 374 375 var ( 376 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 377 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 378 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 379 ) 380 var ( 381 allocs int // Number of active allocations to handle graceful shutdown requests 382 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 383 ) 384 385 // assign is a helper method fo try to assign a pending bit an an actively 386 // listening servicer, or schedule it up for later when one arrives. 387 assign := func(bit uint) { 388 select { 389 case fetcher := <-m.retrievers: 390 allocs++ 391 fetcher <- bit 392 default: 393 // No retrievers active, start listening for new ones 394 retrievers = m.retrievers 395 unallocs[bit] = struct{}{} 396 } 397 } 398 399 for { 400 select { 401 case <-shutdown: 402 // Graceful shutdown requested, wait until all pending requests are honoured 403 if allocs == 0 { 404 return 405 } 406 shutdown = nil 407 408 case <-session.kill: 409 // Pending requests not honoured in time, hard terminate 410 return 411 412 case req := <-dist: 413 // New retrieval request arrived to be distributed to some fetcher process 414 queue := requests[req.bit] 415 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 416 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 417 418 // If it's a new bit and we have waiting fetchers, allocate to them 419 if len(queue) == 0 { 420 assign(req.bit) 421 } 422 423 case fetcher := <-retrievers: 424 // New retriever arrived, find the lowest section-ed bit to assign 425 bit, best := uint(0), uint64(math.MaxUint64) 426 for idx := range unallocs { 427 if requests[idx][0] < best { 428 bit, best = idx, requests[idx][0] 429 } 430 } 431 // Stop tracking this bit (and alloc notifications if no more work is available) 432 delete(unallocs, bit) 433 if len(unallocs) == 0 { 434 retrievers = nil 435 } 436 allocs++ 437 fetcher <- bit 438 439 case fetcher := <-m.counters: 440 // New task count request arrives, return number of items 441 fetcher <- uint(len(requests[<-fetcher])) 442 443 case fetcher := <-m.retrievals: 444 // New fetcher waiting for tasks to retrieve, assign 445 task := <-fetcher 446 if want := len(task.Sections); want >= len(requests[task.Bit]) { 447 task.Sections = requests[task.Bit] 448 delete(requests, task.Bit) 449 } else { 450 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 451 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 452 } 453 fetcher <- task 454 455 // If anything was left unallocated, try to assign to someone else 456 if len(requests[task.Bit]) > 0 { 457 assign(task.Bit) 458 } 459 460 case result := <-m.deliveries: 461 // New retrieval task response from fetcher, split out missing sections and 462 // deliver complete ones 463 var ( 464 sections = make([]uint64, 0, len(result.Sections)) 465 bitsets = make([][]byte, 0, len(result.Bitsets)) 466 missing = make([]uint64, 0, len(result.Sections)) 467 ) 468 for i, bitset := range result.Bitsets { 469 if len(bitset) == 0 { 470 missing = append(missing, result.Sections[i]) 471 continue 472 } 473 sections = append(sections, result.Sections[i]) 474 bitsets = append(bitsets, bitset) 475 } 476 m.schedulers[result.Bit].deliver(sections, bitsets) 477 allocs-- 478 479 // Reschedule missing sections and allocate bit if newly available 480 if len(missing) > 0 { 481 queue := requests[result.Bit] 482 for _, section := range missing { 483 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 484 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 485 } 486 requests[result.Bit] = queue 487 488 if len(queue) == len(missing) { 489 assign(result.Bit) 490 } 491 } 492 // If we're in the process of shutting down, terminate 493 if allocs == 0 && shutdown == nil { 494 return 495 } 496 } 497 } 498 } 499 500 // MatcherSession is returned by a started matcher to be used as a terminator 501 // for the actively running matching operation. 502 type MatcherSession struct { 503 matcher *Matcher 504 505 quit chan struct{} // Quit channel to request pipeline termination 506 kill chan struct{} // Term channel to signal non-graceful forced shutdown 507 pend sync.WaitGroup 508 } 509 510 // Close stops the matching process and waits for all subprocesses to terminate 511 // before returning. The timeout may be used for graceful shutdown, allowing the 512 // currently running retrievals to complete before this time. 513 func (s *MatcherSession) Close(timeout time.Duration) { 514 // Bail out if the matcher is not running 515 select { 516 case <-s.quit: 517 return 518 default: 519 } 520 // Signal termination and wait for all goroutines to tear down 521 close(s.quit) 522 time.AfterFunc(timeout, func() { close(s.kill) }) 523 s.pend.Wait() 524 } 525 526 // AllocateRetrieval assigns a bloom bit index to a client process that can either 527 // immediately reuest and fetch the section contents assigned to this bit or wait 528 // a little while for more sections to be requested. 529 func (s *MatcherSession) AllocateRetrieval() (uint, bool) { 530 fetcher := make(chan uint) 531 532 select { 533 case <-s.quit: 534 return 0, false 535 case s.matcher.retrievers <- fetcher: 536 bit, ok := <-fetcher 537 return bit, ok 538 } 539 } 540 541 // PendingSections returns the number of pending section retrievals belonging to 542 // the given bloom bit index. 543 func (s *MatcherSession) PendingSections(bit uint) int { 544 fetcher := make(chan uint) 545 546 select { 547 case <-s.quit: 548 return 0 549 case s.matcher.counters <- fetcher: 550 fetcher <- bit 551 return int(<-fetcher) 552 } 553 } 554 555 // AllocateSections assigns all or part of an already allocated bit-task queue 556 // to the requesting process. 557 func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { 558 fetcher := make(chan *Retrieval) 559 560 select { 561 case <-s.quit: 562 return nil 563 case s.matcher.retrievals <- fetcher: 564 task := &Retrieval{ 565 Bit: bit, 566 Sections: make([]uint64, count), 567 } 568 fetcher <- task 569 return (<-fetcher).Sections 570 } 571 } 572 573 // DeliverSections delivers a batch of section bit-vectors for a specific bloom 574 // bit index to be injected into the processing pipeline. 575 func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { 576 select { 577 case <-s.kill: 578 return 579 case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: 580 } 581 } 582 583 // Multiplex polls the matcher session for rerieval tasks and multiplexes it into 584 // the reuested retrieval queue to be serviced together with other sessions. 585 // 586 // This method will block for the lifetime of the session. Even after termination 587 // of the session, any request in-flight need to be responded to! Empty responses 588 // are fine though in that case. 589 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 590 for { 591 // Allocate a new bloom bit index to retrieve data for, stopping when done 592 bit, ok := s.AllocateRetrieval() 593 if !ok { 594 return 595 } 596 // Bit allocated, throttle a bit if we're below our batch limit 597 if s.PendingSections(bit) < batch { 598 select { 599 case <-s.quit: 600 // Session terminating, we can't meaningfully service, abort 601 s.AllocateSections(bit, 0) 602 s.DeliverSections(bit, []uint64{}, [][]byte{}) 603 return 604 605 case <-time.After(wait): 606 // Throttling up, fetch whatever's available 607 } 608 } 609 // Allocate as much as we can handle and request servicing 610 sections := s.AllocateSections(bit, batch) 611 request := make(chan *Retrieval) 612 613 select { 614 case <-s.quit: 615 // Session terminating, we can't meaningfully service, abort 616 s.DeliverSections(bit, sections, make([][]byte, len(sections))) 617 return 618 619 case mux <- request: 620 // Retrieval accepted, something must arrive before we're aborting 621 request <- &Retrieval{Bit: bit, Sections: sections} 622 623 result := <-request 624 s.DeliverSections(result.Bit, result.Sections, result.Bitsets) 625 } 626 } 627 }