github.com/arieschain/arieschain@v0.0.0-20191023063405-37c074544356/core/bloombits/matcher.go (about) 1 package bloombits 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "math" 8 "sort" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "github.com/quickchainproject/quickchain/common/bitutil" 14 "github.com/quickchainproject/quickchain/crypto" 15 ) 16 17 // bloomIndexes represents the bit indexes inside the bloom filter that belong 18 // to some key. 19 type bloomIndexes [3]uint 20 21 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 22 func calcBloomIndexes(b []byte) bloomIndexes { 23 b = crypto.Keccak256(b) 24 25 var idxs bloomIndexes 26 for i := 0; i < len(idxs); i++ { 27 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 28 } 29 return idxs 30 } 31 32 // partialMatches with a non-nil vector represents a section in which some sub- 33 // matchers have already found potential matches. Subsequent sub-matchers will 34 // binary AND their matches with this vector. If vector is nil, it represents a 35 // section to be processed by the first sub-matcher. 36 type partialMatches struct { 37 section uint64 38 bitset []byte 39 } 40 41 // Retrieval represents a request for retrieval task assignments for a given 42 // bit with the given number of fetch elements, or a response for such a request. 43 // It can also have the actual results set to be used as a delivery data struct. 44 // 45 // The contest and error fields are used by the light client to terminate matching 46 // early if an error is enountered on some path of the pipeline. 47 type Retrieval struct { 48 Bit uint 49 Sections []uint64 50 Bitsets [][]byte 51 52 Context context.Context 53 Error error 54 } 55 56 // Matcher is a pipelined system of schedulers and logic matchers which perform 57 // binary AND/OR operations on the bit-streams, creating a stream of potential 58 // blocks to inspect for data content. 59 type Matcher struct { 60 sectionSize uint64 // Size of the data batches to filter on 61 62 filters [][]bloomIndexes // Filter the system is matching for 63 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 64 65 retrievers chan chan uint // Retriever processes waiting for bit allocations 66 counters chan chan uint // Retriever processes waiting for task count reports 67 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 68 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 69 70 running uint32 // Atomic flag whether a session is live or not 71 } 72 73 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 74 // address and topic filtering on them. Setting a filter component to `nil` is 75 // allowed and will result in that filter rule being skipped (OR 0x11...1). 76 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 77 // Create the matcher instance 78 m := &Matcher{ 79 sectionSize: sectionSize, 80 schedulers: make(map[uint]*scheduler), 81 retrievers: make(chan chan uint), 82 counters: make(chan chan uint), 83 retrievals: make(chan chan *Retrieval), 84 deliveries: make(chan *Retrieval), 85 } 86 // Calculate the bloom bit indexes for the groups we're interested in 87 m.filters = nil 88 89 for _, filter := range filters { 90 // Gather the bit indexes of the filter rule, special casing the nil filter 91 if len(filter) == 0 { 92 continue 93 } 94 bloomBits := make([]bloomIndexes, len(filter)) 95 for i, clause := range filter { 96 if clause == nil { 97 bloomBits = nil 98 break 99 } 100 bloomBits[i] = calcBloomIndexes(clause) 101 } 102 // Accumulate the filter rules if no nil rule was within 103 if bloomBits != nil { 104 m.filters = append(m.filters, bloomBits) 105 } 106 } 107 // For every bit, create a scheduler to load/download the bit vectors 108 for _, bloomIndexLists := range m.filters { 109 for _, bloomIndexList := range bloomIndexLists { 110 for _, bloomIndex := range bloomIndexList { 111 m.addScheduler(bloomIndex) 112 } 113 } 114 } 115 return m 116 } 117 118 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 119 // it has not existed before. If the bit is already selected for filtering, the 120 // existing scheduler can be used. 121 func (m *Matcher) addScheduler(idx uint) { 122 if _, ok := m.schedulers[idx]; ok { 123 return 124 } 125 m.schedulers[idx] = newScheduler(idx) 126 } 127 128 // Start starts the matching process and returns a stream of bloom matches in 129 // a given range of blocks. If there are no more matches in the range, the result 130 // channel is closed. 131 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 132 // Make sure we're not creating concurrent sessions 133 if atomic.SwapUint32(&m.running, 1) == 1 { 134 return nil, errors.New("matcher already running") 135 } 136 defer atomic.StoreUint32(&m.running, 0) 137 138 // Initiate a new matching round 139 session := &MatcherSession{ 140 matcher: m, 141 quit: make(chan struct{}), 142 kill: make(chan struct{}), 143 ctx: ctx, 144 } 145 for _, scheduler := range m.schedulers { 146 scheduler.reset() 147 } 148 sink := m.run(begin, end, cap(results), session) 149 150 // Read the output from the result sink and deliver to the user 151 session.pend.Add(1) 152 go func() { 153 defer session.pend.Done() 154 defer close(results) 155 156 for { 157 select { 158 case <-session.quit: 159 return 160 161 case res, ok := <-sink: 162 // New match result found 163 if !ok { 164 return 165 } 166 // Calculate the first and last blocks of the section 167 sectionStart := res.section * m.sectionSize 168 169 first := sectionStart 170 if begin > first { 171 first = begin 172 } 173 last := sectionStart + m.sectionSize - 1 174 if end < last { 175 last = end 176 } 177 // Iterate over all the blocks in the section and return the matching ones 178 for i := first; i <= last; i++ { 179 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 180 next := res.bitset[(i-sectionStart)/8] 181 if next == 0 { 182 if i%8 == 0 { 183 i += 7 184 } 185 continue 186 } 187 // Some bit it set, do the actual submatching 188 if bit := 7 - i%8; next&(1<<bit) != 0 { 189 select { 190 case <-session.quit: 191 return 192 case results <- i: 193 } 194 } 195 } 196 } 197 } 198 }() 199 return session, nil 200 } 201 202 // run creates a daisy-chain of sub-matchers, one for the address set and one 203 // for each topic set, each sub-matcher receiving a section only if the previous 204 // ones have all found a potential match in one of the blocks of the section, 205 // then binary AND-ing its own matches and forwaring the result to the next one. 206 // 207 // The method starts feeding the section indexes into the first sub-matcher on a 208 // new goroutine and returns a sink channel receiving the results. 209 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 210 // Create the source channel and feed section indexes into 211 source := make(chan *partialMatches, buffer) 212 213 session.pend.Add(1) 214 go func() { 215 defer session.pend.Done() 216 defer close(source) 217 218 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 219 select { 220 case <-session.quit: 221 return 222 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 223 } 224 } 225 }() 226 // Assemble the daisy-chained filtering pipeline 227 next := source 228 dist := make(chan *request, buffer) 229 230 for _, bloom := range m.filters { 231 next = m.subMatch(next, dist, bloom, session) 232 } 233 // Start the request distribution 234 session.pend.Add(1) 235 go m.distributor(dist, session) 236 237 return next 238 } 239 240 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 241 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 242 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 243 // that address/topic, and binary AND-ing those vectors together. 244 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 245 // Start the concurrent schedulers for each bit required by the bloom filter 246 sectionSources := make([][3]chan uint64, len(bloom)) 247 sectionSinks := make([][3]chan []byte, len(bloom)) 248 for i, bits := range bloom { 249 for j, bit := range bits { 250 sectionSources[i][j] = make(chan uint64, cap(source)) 251 sectionSinks[i][j] = make(chan []byte, cap(source)) 252 253 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 254 } 255 } 256 257 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 258 results := make(chan *partialMatches, cap(source)) 259 260 session.pend.Add(2) 261 go func() { 262 // Tear down the goroutine and terminate all source channels 263 defer session.pend.Done() 264 defer close(process) 265 266 defer func() { 267 for _, bloomSources := range sectionSources { 268 for _, bitSource := range bloomSources { 269 close(bitSource) 270 } 271 } 272 }() 273 // Read sections from the source channel and multiplex into all bit-schedulers 274 for { 275 select { 276 case <-session.quit: 277 return 278 279 case subres, ok := <-source: 280 // New subresult from previous link 281 if !ok { 282 return 283 } 284 // Multiplex the section index to all bit-schedulers 285 for _, bloomSources := range sectionSources { 286 for _, bitSource := range bloomSources { 287 select { 288 case <-session.quit: 289 return 290 case bitSource <- subres.section: 291 } 292 } 293 } 294 // Notify the processor that this section will become available 295 select { 296 case <-session.quit: 297 return 298 case process <- subres: 299 } 300 } 301 } 302 }() 303 304 go func() { 305 // Tear down the goroutine and terminate the final sink channel 306 defer session.pend.Done() 307 defer close(results) 308 309 // Read the source notifications and collect the delivered results 310 for { 311 select { 312 case <-session.quit: 313 return 314 315 case subres, ok := <-process: 316 // Notified of a section being retrieved 317 if !ok { 318 return 319 } 320 // Gather all the sub-results and merge them together 321 var orVector []byte 322 for _, bloomSinks := range sectionSinks { 323 var andVector []byte 324 for _, bitSink := range bloomSinks { 325 var data []byte 326 select { 327 case <-session.quit: 328 return 329 case data = <-bitSink: 330 } 331 if andVector == nil { 332 andVector = make([]byte, int(m.sectionSize/8)) 333 copy(andVector, data) 334 } else { 335 bitutil.ANDBytes(andVector, andVector, data) 336 } 337 } 338 if orVector == nil { 339 orVector = andVector 340 } else { 341 bitutil.ORBytes(orVector, orVector, andVector) 342 } 343 } 344 345 if orVector == nil { 346 orVector = make([]byte, int(m.sectionSize/8)) 347 } 348 if subres.bitset != nil { 349 bitutil.ANDBytes(orVector, orVector, subres.bitset) 350 } 351 if bitutil.TestBytes(orVector) { 352 select { 353 case <-session.quit: 354 return 355 case results <- &partialMatches{subres.section, orVector}: 356 } 357 } 358 } 359 } 360 }() 361 return results 362 } 363 364 // distributor receives requests from the schedulers and queues them into a set 365 // of pending requests, which are assigned to retrievers wanting to fulfil them. 366 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 367 defer session.pend.Done() 368 369 var ( 370 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 371 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 372 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 373 ) 374 var ( 375 allocs int // Number of active allocations to handle graceful shutdown requests 376 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 377 ) 378 379 // assign is a helper method fo try to assign a pending bit an actively 380 // listening servicer, or schedule it up for later when one arrives. 381 assign := func(bit uint) { 382 select { 383 case fetcher := <-m.retrievers: 384 allocs++ 385 fetcher <- bit 386 default: 387 // No retrievers active, start listening for new ones 388 retrievers = m.retrievers 389 unallocs[bit] = struct{}{} 390 } 391 } 392 393 for { 394 select { 395 case <-shutdown: 396 // Graceful shutdown requested, wait until all pending requests are honoured 397 if allocs == 0 { 398 return 399 } 400 shutdown = nil 401 402 case <-session.kill: 403 // Pending requests not honoured in time, hard terminate 404 return 405 406 case req := <-dist: 407 // New retrieval request arrived to be distributed to some fetcher process 408 queue := requests[req.bit] 409 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 410 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 411 412 // If it's a new bit and we have waiting fetchers, allocate to them 413 if len(queue) == 0 { 414 assign(req.bit) 415 } 416 417 case fetcher := <-retrievers: 418 // New retriever arrived, find the lowest section-ed bit to assign 419 bit, best := uint(0), uint64(math.MaxUint64) 420 for idx := range unallocs { 421 if requests[idx][0] < best { 422 bit, best = idx, requests[idx][0] 423 } 424 } 425 // Stop tracking this bit (and alloc notifications if no more work is available) 426 delete(unallocs, bit) 427 if len(unallocs) == 0 { 428 retrievers = nil 429 } 430 allocs++ 431 fetcher <- bit 432 433 case fetcher := <-m.counters: 434 // New task count request arrives, return number of items 435 fetcher <- uint(len(requests[<-fetcher])) 436 437 case fetcher := <-m.retrievals: 438 // New fetcher waiting for tasks to retrieve, assign 439 task := <-fetcher 440 if want := len(task.Sections); want >= len(requests[task.Bit]) { 441 task.Sections = requests[task.Bit] 442 delete(requests, task.Bit) 443 } else { 444 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 445 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 446 } 447 fetcher <- task 448 449 // If anything was left unallocated, try to assign to someone else 450 if len(requests[task.Bit]) > 0 { 451 assign(task.Bit) 452 } 453 454 case result := <-m.deliveries: 455 // New retrieval task response from fetcher, split out missing sections and 456 // deliver complete ones 457 var ( 458 sections = make([]uint64, 0, len(result.Sections)) 459 bitsets = make([][]byte, 0, len(result.Bitsets)) 460 missing = make([]uint64, 0, len(result.Sections)) 461 ) 462 for i, bitset := range result.Bitsets { 463 if len(bitset) == 0 { 464 missing = append(missing, result.Sections[i]) 465 continue 466 } 467 sections = append(sections, result.Sections[i]) 468 bitsets = append(bitsets, bitset) 469 } 470 m.schedulers[result.Bit].deliver(sections, bitsets) 471 allocs-- 472 473 // Reschedule missing sections and allocate bit if newly available 474 if len(missing) > 0 { 475 queue := requests[result.Bit] 476 for _, section := range missing { 477 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 478 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 479 } 480 requests[result.Bit] = queue 481 482 if len(queue) == len(missing) { 483 assign(result.Bit) 484 } 485 } 486 // If we're in the process of shutting down, terminate 487 if allocs == 0 && shutdown == nil { 488 return 489 } 490 } 491 } 492 } 493 494 // MatcherSession is returned by a started matcher to be used as a terminator 495 // for the actively running matching operation. 496 type MatcherSession struct { 497 matcher *Matcher 498 499 closer sync.Once // Sync object to ensure we only ever close once 500 quit chan struct{} // Quit channel to request pipeline termination 501 kill chan struct{} // Term channel to signal non-graceful forced shutdown 502 503 ctx context.Context // Context used by the light client to abort filtering 504 err atomic.Value // Global error to track retrieval failures deep in the chain 505 506 pend sync.WaitGroup 507 } 508 509 // Close stops the matching process and waits for all subprocesses to terminate 510 // before returning. The timeout may be used for graceful shutdown, allowing the 511 // currently running retrievals to complete before this time. 512 func (s *MatcherSession) Close() { 513 s.closer.Do(func() { 514 // Signal termination and wait for all goroutines to tear down 515 close(s.quit) 516 time.AfterFunc(time.Second, func() { close(s.kill) }) 517 s.pend.Wait() 518 }) 519 } 520 521 // Error returns any failure encountered during the matching session. 522 func (s *MatcherSession) Error() error { 523 if err := s.err.Load(); err != nil { 524 return err.(error) 525 } 526 return nil 527 } 528 529 // AllocateRetrieval assigns a bloom bit index to a client process that can either 530 // immediately reuest and fetch the section contents assigned to this bit or wait 531 // a little while for more sections to be requested. 532 func (s *MatcherSession) AllocateRetrieval() (uint, bool) { 533 fetcher := make(chan uint) 534 535 select { 536 case <-s.quit: 537 return 0, false 538 case s.matcher.retrievers <- fetcher: 539 bit, ok := <-fetcher 540 return bit, ok 541 } 542 } 543 544 // PendingSections returns the number of pending section retrievals belonging to 545 // the given bloom bit index. 546 func (s *MatcherSession) PendingSections(bit uint) int { 547 fetcher := make(chan uint) 548 549 select { 550 case <-s.quit: 551 return 0 552 case s.matcher.counters <- fetcher: 553 fetcher <- bit 554 return int(<-fetcher) 555 } 556 } 557 558 // AllocateSections assigns all or part of an already allocated bit-task queue 559 // to the requesting process. 560 func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { 561 fetcher := make(chan *Retrieval) 562 563 select { 564 case <-s.quit: 565 return nil 566 case s.matcher.retrievals <- fetcher: 567 task := &Retrieval{ 568 Bit: bit, 569 Sections: make([]uint64, count), 570 } 571 fetcher <- task 572 return (<-fetcher).Sections 573 } 574 } 575 576 // DeliverSections delivers a batch of section bit-vectors for a specific bloom 577 // bit index to be injected into the processing pipeline. 578 func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { 579 select { 580 case <-s.kill: 581 return 582 case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: 583 } 584 } 585 586 // Multiplex polls the matcher session for rerieval tasks and multiplexes it into 587 // the reuested retrieval queue to be serviced together with other sessions. 588 // 589 // This method will block for the lifetime of the session. Even after termination 590 // of the session, any request in-flight need to be responded to! Empty responses 591 // are fine though in that case. 592 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 593 for { 594 // Allocate a new bloom bit index to retrieve data for, stopping when done 595 bit, ok := s.AllocateRetrieval() 596 if !ok { 597 return 598 } 599 // Bit allocated, throttle a bit if we're below our batch limit 600 if s.PendingSections(bit) < batch { 601 select { 602 case <-s.quit: 603 // Session terminating, we can't meaningfully service, abort 604 s.AllocateSections(bit, 0) 605 s.DeliverSections(bit, []uint64{}, [][]byte{}) 606 return 607 608 case <-time.After(wait): 609 // Throttling up, fetch whatever's available 610 } 611 } 612 // Allocate as much as we can handle and request servicing 613 sections := s.AllocateSections(bit, batch) 614 request := make(chan *Retrieval) 615 616 select { 617 case <-s.quit: 618 // Session terminating, we can't meaningfully service, abort 619 s.DeliverSections(bit, sections, make([][]byte, len(sections))) 620 return 621 622 case mux <- request: 623 // Retrieval accepted, something must arrive before we're aborting 624 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 625 626 result := <-request 627 if result.Error != nil { 628 s.err.Store(result.Error) 629 s.Close() 630 } 631 s.DeliverSections(result.Bit, result.Sections, result.Bitsets) 632 } 633 } 634 }