github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/core/bloombits/matcher.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // Copyright 2019 The go-aigar Authors 3 // This file is part of the go-aigar library. 4 // 5 // The go-aigar library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-aigar library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>. 17 18 package bloombits 19 20 import ( 21 "bytes" 22 "context" 23 "errors" 24 "math" 25 "sort" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 "github.com/AigarNetwork/aigar/common/bitutil" 31 "github.com/AigarNetwork/aigar/crypto" 32 ) 33 34 // bloomIndexes represents the bit indexes inside the bloom filter that belong 35 // to some key. 36 type bloomIndexes [3]uint 37 38 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 39 func calcBloomIndexes(b []byte) bloomIndexes { 40 b = crypto.Keccak256(b) 41 42 var idxs bloomIndexes 43 for i := 0; i < len(idxs); i++ { 44 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 45 } 46 return idxs 47 } 48 49 // partialMatches with a non-nil vector represents a section in which some sub- 50 // matchers have already found potential matches. Subsequent sub-matchers will 51 // binary AND their matches with this vector. If vector is nil, it represents a 52 // section to be processed by the first sub-matcher. 53 type partialMatches struct { 54 section uint64 55 bitset []byte 56 } 57 58 // Retrieval represents a request for retrieval task assignments for a given 59 // bit with the given number of fetch elements, or a response for such a request. 60 // It can also have the actual results set to be used as a delivery data struct. 61 // 62 // The contest and error fields are used by the light client to terminate matching 63 // early if an error is encountered on some path of the pipeline. 64 type Retrieval struct { 65 Bit uint 66 Sections []uint64 67 Bitsets [][]byte 68 69 Context context.Context 70 Error error 71 } 72 73 // Matcher is a pipelined system of schedulers and logic matchers which perform 74 // binary AND/OR operations on the bit-streams, creating a stream of potential 75 // blocks to inspect for data content. 76 type Matcher struct { 77 sectionSize uint64 // Size of the data batches to filter on 78 79 filters [][]bloomIndexes // Filter the system is matching for 80 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 81 82 retrievers chan chan uint // Retriever processes waiting for bit allocations 83 counters chan chan uint // Retriever processes waiting for task count reports 84 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 85 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 86 87 running uint32 // Atomic flag whether a session is live or not 88 } 89 90 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 91 // address and topic filtering on them. Setting a filter component to `nil` is 92 // allowed and will result in that filter rule being skipped (OR 0x11...1). 93 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 94 // Create the matcher instance 95 m := &Matcher{ 96 sectionSize: sectionSize, 97 schedulers: make(map[uint]*scheduler), 98 retrievers: make(chan chan uint), 99 counters: make(chan chan uint), 100 retrievals: make(chan chan *Retrieval), 101 deliveries: make(chan *Retrieval), 102 } 103 // Calculate the bloom bit indexes for the groups we're interested in 104 m.filters = nil 105 106 for _, filter := range filters { 107 // Gather the bit indexes of the filter rule, special casing the nil filter 108 if len(filter) == 0 { 109 continue 110 } 111 bloomBits := make([]bloomIndexes, len(filter)) 112 for i, clause := range filter { 113 if clause == nil { 114 bloomBits = nil 115 break 116 } 117 bloomBits[i] = calcBloomIndexes(clause) 118 } 119 // Accumulate the filter rules if no nil rule was within 120 if bloomBits != nil { 121 m.filters = append(m.filters, bloomBits) 122 } 123 } 124 // For every bit, create a scheduler to load/download the bit vectors 125 for _, bloomIndexLists := range m.filters { 126 for _, bloomIndexList := range bloomIndexLists { 127 for _, bloomIndex := range bloomIndexList { 128 m.addScheduler(bloomIndex) 129 } 130 } 131 } 132 return m 133 } 134 135 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 136 // it has not existed before. If the bit is already selected for filtering, the 137 // existing scheduler can be used. 138 func (m *Matcher) addScheduler(idx uint) { 139 if _, ok := m.schedulers[idx]; ok { 140 return 141 } 142 m.schedulers[idx] = newScheduler(idx) 143 } 144 145 // Start starts the matching process and returns a stream of bloom matches in 146 // a given range of blocks. If there are no more matches in the range, the result 147 // channel is closed. 148 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 149 // Make sure we're not creating concurrent sessions 150 if atomic.SwapUint32(&m.running, 1) == 1 { 151 return nil, errors.New("matcher already running") 152 } 153 defer atomic.StoreUint32(&m.running, 0) 154 155 // Initiate a new matching round 156 session := &MatcherSession{ 157 matcher: m, 158 quit: make(chan struct{}), 159 kill: make(chan struct{}), 160 ctx: ctx, 161 } 162 for _, scheduler := range m.schedulers { 163 scheduler.reset() 164 } 165 sink := m.run(begin, end, cap(results), session) 166 167 // Read the output from the result sink and deliver to the user 168 session.pend.Add(1) 169 go func() { 170 defer session.pend.Done() 171 defer close(results) 172 173 for { 174 select { 175 case <-session.quit: 176 return 177 178 case res, ok := <-sink: 179 // New match result found 180 if !ok { 181 return 182 } 183 // Calculate the first and last blocks of the section 184 sectionStart := res.section * m.sectionSize 185 186 first := sectionStart 187 if begin > first { 188 first = begin 189 } 190 last := sectionStart + m.sectionSize - 1 191 if end < last { 192 last = end 193 } 194 // Iterate over all the blocks in the section and return the matching ones 195 for i := first; i <= last; i++ { 196 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 197 next := res.bitset[(i-sectionStart)/8] 198 if next == 0 { 199 if i%8 == 0 { 200 i += 7 201 } 202 continue 203 } 204 // Some bit it set, do the actual submatching 205 if bit := 7 - i%8; next&(1<<bit) != 0 { 206 select { 207 case <-session.quit: 208 return 209 case results <- i: 210 } 211 } 212 } 213 } 214 } 215 }() 216 return session, nil 217 } 218 219 // run creates a daisy-chain of sub-matchers, one for the address set and one 220 // for each topic set, each sub-matcher receiving a section only if the previous 221 // ones have all found a potential match in one of the blocks of the section, 222 // then binary AND-ing its own matches and forwarding the result to the next one. 223 // 224 // The method starts feeding the section indexes into the first sub-matcher on a 225 // new goroutine and returns a sink channel receiving the results. 226 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 227 // Create the source channel and feed section indexes into 228 source := make(chan *partialMatches, buffer) 229 230 session.pend.Add(1) 231 go func() { 232 defer session.pend.Done() 233 defer close(source) 234 235 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 236 select { 237 case <-session.quit: 238 return 239 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 240 } 241 } 242 }() 243 // Assemble the daisy-chained filtering pipeline 244 next := source 245 dist := make(chan *request, buffer) 246 247 for _, bloom := range m.filters { 248 next = m.subMatch(next, dist, bloom, session) 249 } 250 // Start the request distribution 251 session.pend.Add(1) 252 go m.distributor(dist, session) 253 254 return next 255 } 256 257 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 258 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 259 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 260 // that address/topic, and binary AND-ing those vectors together. 261 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 262 // Start the concurrent schedulers for each bit required by the bloom filter 263 sectionSources := make([][3]chan uint64, len(bloom)) 264 sectionSinks := make([][3]chan []byte, len(bloom)) 265 for i, bits := range bloom { 266 for j, bit := range bits { 267 sectionSources[i][j] = make(chan uint64, cap(source)) 268 sectionSinks[i][j] = make(chan []byte, cap(source)) 269 270 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 271 } 272 } 273 274 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 275 results := make(chan *partialMatches, cap(source)) 276 277 session.pend.Add(2) 278 go func() { 279 // Tear down the goroutine and terminate all source channels 280 defer session.pend.Done() 281 defer close(process) 282 283 defer func() { 284 for _, bloomSources := range sectionSources { 285 for _, bitSource := range bloomSources { 286 close(bitSource) 287 } 288 } 289 }() 290 // Read sections from the source channel and multiplex into all bit-schedulers 291 for { 292 select { 293 case <-session.quit: 294 return 295 296 case subres, ok := <-source: 297 // New subresult from previous link 298 if !ok { 299 return 300 } 301 // Multiplex the section index to all bit-schedulers 302 for _, bloomSources := range sectionSources { 303 for _, bitSource := range bloomSources { 304 select { 305 case <-session.quit: 306 return 307 case bitSource <- subres.section: 308 } 309 } 310 } 311 // Notify the processor that this section will become available 312 select { 313 case <-session.quit: 314 return 315 case process <- subres: 316 } 317 } 318 } 319 }() 320 321 go func() { 322 // Tear down the goroutine and terminate the final sink channel 323 defer session.pend.Done() 324 defer close(results) 325 326 // Read the source notifications and collect the delivered results 327 for { 328 select { 329 case <-session.quit: 330 return 331 332 case subres, ok := <-process: 333 // Notified of a section being retrieved 334 if !ok { 335 return 336 } 337 // Gather all the sub-results and merge them together 338 var orVector []byte 339 for _, bloomSinks := range sectionSinks { 340 var andVector []byte 341 for _, bitSink := range bloomSinks { 342 var data []byte 343 select { 344 case <-session.quit: 345 return 346 case data = <-bitSink: 347 } 348 if andVector == nil { 349 andVector = make([]byte, int(m.sectionSize/8)) 350 copy(andVector, data) 351 } else { 352 bitutil.ANDBytes(andVector, andVector, data) 353 } 354 } 355 if orVector == nil { 356 orVector = andVector 357 } else { 358 bitutil.ORBytes(orVector, orVector, andVector) 359 } 360 } 361 362 if orVector == nil { 363 orVector = make([]byte, int(m.sectionSize/8)) 364 } 365 if subres.bitset != nil { 366 bitutil.ANDBytes(orVector, orVector, subres.bitset) 367 } 368 if bitutil.TestBytes(orVector) { 369 select { 370 case <-session.quit: 371 return 372 case results <- &partialMatches{subres.section, orVector}: 373 } 374 } 375 } 376 } 377 }() 378 return results 379 } 380 381 // distributor receives requests from the schedulers and queues them into a set 382 // of pending requests, which are assigned to retrievers wanting to fulfil them. 383 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 384 defer session.pend.Done() 385 386 var ( 387 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 388 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 389 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 390 ) 391 var ( 392 allocs int // Number of active allocations to handle graceful shutdown requests 393 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 394 ) 395 396 // assign is a helper method fo try to assign a pending bit an actively 397 // listening servicer, or schedule it up for later when one arrives. 398 assign := func(bit uint) { 399 select { 400 case fetcher := <-m.retrievers: 401 allocs++ 402 fetcher <- bit 403 default: 404 // No retrievers active, start listening for new ones 405 retrievers = m.retrievers 406 unallocs[bit] = struct{}{} 407 } 408 } 409 410 for { 411 select { 412 case <-shutdown: 413 // Graceful shutdown requested, wait until all pending requests are honoured 414 if allocs == 0 { 415 return 416 } 417 shutdown = nil 418 419 case <-session.kill: 420 // Pending requests not honoured in time, hard terminate 421 return 422 423 case req := <-dist: 424 // New retrieval request arrived to be distributed to some fetcher process 425 queue := requests[req.bit] 426 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 427 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 428 429 // If it's a new bit and we have waiting fetchers, allocate to them 430 if len(queue) == 0 { 431 assign(req.bit) 432 } 433 434 case fetcher := <-retrievers: 435 // New retriever arrived, find the lowest section-ed bit to assign 436 bit, best := uint(0), uint64(math.MaxUint64) 437 for idx := range unallocs { 438 if requests[idx][0] < best { 439 bit, best = idx, requests[idx][0] 440 } 441 } 442 // Stop tracking this bit (and alloc notifications if no more work is available) 443 delete(unallocs, bit) 444 if len(unallocs) == 0 { 445 retrievers = nil 446 } 447 allocs++ 448 fetcher <- bit 449 450 case fetcher := <-m.counters: 451 // New task count request arrives, return number of items 452 fetcher <- uint(len(requests[<-fetcher])) 453 454 case fetcher := <-m.retrievals: 455 // New fetcher waiting for tasks to retrieve, assign 456 task := <-fetcher 457 if want := len(task.Sections); want >= len(requests[task.Bit]) { 458 task.Sections = requests[task.Bit] 459 delete(requests, task.Bit) 460 } else { 461 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 462 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 463 } 464 fetcher <- task 465 466 // If anything was left unallocated, try to assign to someone else 467 if len(requests[task.Bit]) > 0 { 468 assign(task.Bit) 469 } 470 471 case result := <-m.deliveries: 472 // New retrieval task response from fetcher, split out missing sections and 473 // deliver complete ones 474 var ( 475 sections = make([]uint64, 0, len(result.Sections)) 476 bitsets = make([][]byte, 0, len(result.Bitsets)) 477 missing = make([]uint64, 0, len(result.Sections)) 478 ) 479 for i, bitset := range result.Bitsets { 480 if len(bitset) == 0 { 481 missing = append(missing, result.Sections[i]) 482 continue 483 } 484 sections = append(sections, result.Sections[i]) 485 bitsets = append(bitsets, bitset) 486 } 487 m.schedulers[result.Bit].deliver(sections, bitsets) 488 allocs-- 489 490 // Reschedule missing sections and allocate bit if newly available 491 if len(missing) > 0 { 492 queue := requests[result.Bit] 493 for _, section := range missing { 494 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 495 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 496 } 497 requests[result.Bit] = queue 498 499 if len(queue) == len(missing) { 500 assign(result.Bit) 501 } 502 } 503 // If we're in the process of shutting down, terminate 504 if allocs == 0 && shutdown == nil { 505 return 506 } 507 } 508 } 509 } 510 511 // MatcherSession is returned by a started matcher to be used as a terminator 512 // for the actively running matching operation. 513 type MatcherSession struct { 514 matcher *Matcher 515 516 closer sync.Once // Sync object to ensure we only ever close once 517 quit chan struct{} // Quit channel to request pipeline termination 518 kill chan struct{} // Term channel to signal non-graceful forced shutdown 519 520 ctx context.Context // Context used by the light client to abort filtering 521 err atomic.Value // Global error to track retrieval failures deep in the chain 522 523 pend sync.WaitGroup 524 } 525 526 // Close stops the matching process and waits for all subprocesses to terminate 527 // before returning. The timeout may be used for graceful shutdown, allowing the 528 // currently running retrievals to complete before this time. 529 func (s *MatcherSession) Close() { 530 s.closer.Do(func() { 531 // Signal termination and wait for all goroutines to tear down 532 close(s.quit) 533 time.AfterFunc(time.Second, func() { close(s.kill) }) 534 s.pend.Wait() 535 }) 536 } 537 538 // Error returns any failure encountered during the matching session. 539 func (s *MatcherSession) Error() error { 540 if err := s.err.Load(); err != nil { 541 return err.(error) 542 } 543 return nil 544 } 545 546 // AllocateRetrieval assigns a bloom bit index to a client process that can either 547 // immediately request and fetch the section contents assigned to this bit or wait 548 // a little while for more sections to be requested. 549 func (s *MatcherSession) AllocateRetrieval() (uint, bool) { 550 fetcher := make(chan uint) 551 552 select { 553 case <-s.quit: 554 return 0, false 555 case s.matcher.retrievers <- fetcher: 556 bit, ok := <-fetcher 557 return bit, ok 558 } 559 } 560 561 // PendingSections returns the number of pending section retrievals belonging to 562 // the given bloom bit index. 563 func (s *MatcherSession) PendingSections(bit uint) int { 564 fetcher := make(chan uint) 565 566 select { 567 case <-s.quit: 568 return 0 569 case s.matcher.counters <- fetcher: 570 fetcher <- bit 571 return int(<-fetcher) 572 } 573 } 574 575 // AllocateSections assigns all or part of an already allocated bit-task queue 576 // to the requesting process. 577 func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { 578 fetcher := make(chan *Retrieval) 579 580 select { 581 case <-s.quit: 582 return nil 583 case s.matcher.retrievals <- fetcher: 584 task := &Retrieval{ 585 Bit: bit, 586 Sections: make([]uint64, count), 587 } 588 fetcher <- task 589 return (<-fetcher).Sections 590 } 591 } 592 593 // DeliverSections delivers a batch of section bit-vectors for a specific bloom 594 // bit index to be injected into the processing pipeline. 595 func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { 596 select { 597 case <-s.kill: 598 return 599 case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: 600 } 601 } 602 603 // Multiplex polls the matcher session for retrieval tasks and multiplexes it into 604 // the requested retrieval queue to be serviced together with other sessions. 605 // 606 // This method will block for the lifetime of the session. Even after termination 607 // of the session, any request in-flight need to be responded to! Empty responses 608 // are fine though in that case. 609 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 610 for { 611 // Allocate a new bloom bit index to retrieve data for, stopping when done 612 bit, ok := s.AllocateRetrieval() 613 if !ok { 614 return 615 } 616 // Bit allocated, throttle a bit if we're below our batch limit 617 if s.PendingSections(bit) < batch { 618 select { 619 case <-s.quit: 620 // Session terminating, we can't meaningfully service, abort 621 s.AllocateSections(bit, 0) 622 s.DeliverSections(bit, []uint64{}, [][]byte{}) 623 return 624 625 case <-time.After(wait): 626 // Throttling up, fetch whatever's available 627 } 628 } 629 // Allocate as much as we can handle and request servicing 630 sections := s.AllocateSections(bit, batch) 631 request := make(chan *Retrieval) 632 633 select { 634 case <-s.quit: 635 // Session terminating, we can't meaningfully service, abort 636 s.DeliverSections(bit, sections, make([][]byte, len(sections))) 637 return 638 639 case mux <- request: 640 // Retrieval accepted, something must arrive before we're aborting 641 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 642 643 result := <-request 644 if result.Error != nil { 645 s.err.Store(result.Error) 646 s.Close() 647 } 648 s.DeliverSections(result.Bit, result.Sections, result.Bitsets) 649 } 650 } 651 }