github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/core/bloombits/matcher.go (about) 1 // This file is part of the go-sberex library. The go-sberex library is 2 // free software: you can redistribute it and/or modify it under the terms 3 // of the GNU Lesser General Public License as published by the Free 4 // Software Foundation, either version 3 of the License, or (at your option) 5 // any later version. 6 // 7 // The go-sberex library is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 10 // General Public License <http://www.gnu.org/licenses/> for more details. 11 12 package bloombits 13 14 import ( 15 "bytes" 16 "context" 17 "errors" 18 "math" 19 "sort" 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "github.com/Sberex/go-sberex/common/bitutil" 25 "github.com/Sberex/go-sberex/crypto" 26 ) 27 28 // bloomIndexes represents the bit indexes inside the bloom filter that belong 29 // to some key. 30 type bloomIndexes [3]uint 31 32 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 33 func calcBloomIndexes(b []byte) bloomIndexes { 34 b = crypto.Keccak256(b) 35 36 var idxs bloomIndexes 37 for i := 0; i < len(idxs); i++ { 38 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 39 } 40 return idxs 41 } 42 43 // partialMatches with a non-nil vector represents a section in which some sub- 44 // matchers have already found potential matches. Subsequent sub-matchers will 45 // binary AND their matches with this vector. If vector is nil, it represents a 46 // section to be processed by the first sub-matcher. 47 type partialMatches struct { 48 section uint64 49 bitset []byte 50 } 51 52 // Retrieval represents a request for retrieval task assignments for a given 53 // bit with the given number of fetch elements, or a response for such a request. 54 // It can also have the actual results set to be used as a delivery data struct. 55 // 56 // The contest and error fields are used by the light client to terminate matching 57 // early if an error is enountered on some path of the pipeline. 58 type Retrieval struct { 59 Bit uint 60 Sections []uint64 61 Bitsets [][]byte 62 63 Context context.Context 64 Error error 65 } 66 67 // Matcher is a pipelined system of schedulers and logic matchers which perform 68 // binary AND/OR operations on the bit-streams, creating a stream of potential 69 // blocks to inspect for data content. 70 type Matcher struct { 71 sectionSize uint64 // Size of the data batches to filter on 72 73 filters [][]bloomIndexes // Filter the system is matching for 74 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 75 76 retrievers chan chan uint // Retriever processes waiting for bit allocations 77 counters chan chan uint // Retriever processes waiting for task count reports 78 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 79 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 80 81 running uint32 // Atomic flag whether a session is live or not 82 } 83 84 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 85 // address and topic filtering on them. Setting a filter component to `nil` is 86 // allowed and will result in that filter rule being skipped (OR 0x11...1). 87 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 88 // Create the matcher instance 89 m := &Matcher{ 90 sectionSize: sectionSize, 91 schedulers: make(map[uint]*scheduler), 92 retrievers: make(chan chan uint), 93 counters: make(chan chan uint), 94 retrievals: make(chan chan *Retrieval), 95 deliveries: make(chan *Retrieval), 96 } 97 // Calculate the bloom bit indexes for the groups we're interested in 98 m.filters = nil 99 100 for _, filter := range filters { 101 // Gather the bit indexes of the filter rule, special casing the nil filter 102 if len(filter) == 0 { 103 continue 104 } 105 bloomBits := make([]bloomIndexes, len(filter)) 106 for i, clause := range filter { 107 if clause == nil { 108 bloomBits = nil 109 break 110 } 111 bloomBits[i] = calcBloomIndexes(clause) 112 } 113 // Accumulate the filter rules if no nil rule was within 114 if bloomBits != nil { 115 m.filters = append(m.filters, bloomBits) 116 } 117 } 118 // For every bit, create a scheduler to load/download the bit vectors 119 for _, bloomIndexLists := range m.filters { 120 for _, bloomIndexList := range bloomIndexLists { 121 for _, bloomIndex := range bloomIndexList { 122 m.addScheduler(bloomIndex) 123 } 124 } 125 } 126 return m 127 } 128 129 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 130 // it has not existed before. If the bit is already selected for filtering, the 131 // existing scheduler can be used. 132 func (m *Matcher) addScheduler(idx uint) { 133 if _, ok := m.schedulers[idx]; ok { 134 return 135 } 136 m.schedulers[idx] = newScheduler(idx) 137 } 138 139 // Start starts the matching process and returns a stream of bloom matches in 140 // a given range of blocks. If there are no more matches in the range, the result 141 // channel is closed. 142 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 143 // Make sure we're not creating concurrent sessions 144 if atomic.SwapUint32(&m.running, 1) == 1 { 145 return nil, errors.New("matcher already running") 146 } 147 defer atomic.StoreUint32(&m.running, 0) 148 149 // Initiate a new matching round 150 session := &MatcherSession{ 151 matcher: m, 152 quit: make(chan struct{}), 153 kill: make(chan struct{}), 154 ctx: ctx, 155 } 156 for _, scheduler := range m.schedulers { 157 scheduler.reset() 158 } 159 sink := m.run(begin, end, cap(results), session) 160 161 // Read the output from the result sink and deliver to the user 162 session.pend.Add(1) 163 go func() { 164 defer session.pend.Done() 165 defer close(results) 166 167 for { 168 select { 169 case <-session.quit: 170 return 171 172 case res, ok := <-sink: 173 // New match result found 174 if !ok { 175 return 176 } 177 // Calculate the first and last blocks of the section 178 sectionStart := res.section * m.sectionSize 179 180 first := sectionStart 181 if begin > first { 182 first = begin 183 } 184 last := sectionStart + m.sectionSize - 1 185 if end < last { 186 last = end 187 } 188 // Iterate over all the blocks in the section and return the matching ones 189 for i := first; i <= last; i++ { 190 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 191 next := res.bitset[(i-sectionStart)/8] 192 if next == 0 { 193 if i%8 == 0 { 194 i += 7 195 } 196 continue 197 } 198 // Some bit it set, do the actual submatching 199 if bit := 7 - i%8; next&(1<<bit) != 0 { 200 select { 201 case <-session.quit: 202 return 203 case results <- i: 204 } 205 } 206 } 207 } 208 } 209 }() 210 return session, nil 211 } 212 213 // run creates a daisy-chain of sub-matchers, one for the address set and one 214 // for each topic set, each sub-matcher receiving a section only if the previous 215 // ones have all found a potential match in one of the blocks of the section, 216 // then binary AND-ing its own matches and forwaring the result to the next one. 217 // 218 // The method starts feeding the section indexes into the first sub-matcher on a 219 // new goroutine and returns a sink channel receiving the results. 220 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 221 // Create the source channel and feed section indexes into 222 source := make(chan *partialMatches, buffer) 223 224 session.pend.Add(1) 225 go func() { 226 defer session.pend.Done() 227 defer close(source) 228 229 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 230 select { 231 case <-session.quit: 232 return 233 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 234 } 235 } 236 }() 237 // Assemble the daisy-chained filtering pipeline 238 next := source 239 dist := make(chan *request, buffer) 240 241 for _, bloom := range m.filters { 242 next = m.subMatch(next, dist, bloom, session) 243 } 244 // Start the request distribution 245 session.pend.Add(1) 246 go m.distributor(dist, session) 247 248 return next 249 } 250 251 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 252 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 253 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 254 // that address/topic, and binary AND-ing those vectors together. 255 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 256 // Start the concurrent schedulers for each bit required by the bloom filter 257 sectionSources := make([][3]chan uint64, len(bloom)) 258 sectionSinks := make([][3]chan []byte, len(bloom)) 259 for i, bits := range bloom { 260 for j, bit := range bits { 261 sectionSources[i][j] = make(chan uint64, cap(source)) 262 sectionSinks[i][j] = make(chan []byte, cap(source)) 263 264 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 265 } 266 } 267 268 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 269 results := make(chan *partialMatches, cap(source)) 270 271 session.pend.Add(2) 272 go func() { 273 // Tear down the goroutine and terminate all source channels 274 defer session.pend.Done() 275 defer close(process) 276 277 defer func() { 278 for _, bloomSources := range sectionSources { 279 for _, bitSource := range bloomSources { 280 close(bitSource) 281 } 282 } 283 }() 284 // Read sections from the source channel and multiplex into all bit-schedulers 285 for { 286 select { 287 case <-session.quit: 288 return 289 290 case subres, ok := <-source: 291 // New subresult from previous link 292 if !ok { 293 return 294 } 295 // Multiplex the section index to all bit-schedulers 296 for _, bloomSources := range sectionSources { 297 for _, bitSource := range bloomSources { 298 select { 299 case <-session.quit: 300 return 301 case bitSource <- subres.section: 302 } 303 } 304 } 305 // Notify the processor that this section will become available 306 select { 307 case <-session.quit: 308 return 309 case process <- subres: 310 } 311 } 312 } 313 }() 314 315 go func() { 316 // Tear down the goroutine and terminate the final sink channel 317 defer session.pend.Done() 318 defer close(results) 319 320 // Read the source notifications and collect the delivered results 321 for { 322 select { 323 case <-session.quit: 324 return 325 326 case subres, ok := <-process: 327 // Notified of a section being retrieved 328 if !ok { 329 return 330 } 331 // Gather all the sub-results and merge them together 332 var orVector []byte 333 for _, bloomSinks := range sectionSinks { 334 var andVector []byte 335 for _, bitSink := range bloomSinks { 336 var data []byte 337 select { 338 case <-session.quit: 339 return 340 case data = <-bitSink: 341 } 342 if andVector == nil { 343 andVector = make([]byte, int(m.sectionSize/8)) 344 copy(andVector, data) 345 } else { 346 bitutil.ANDBytes(andVector, andVector, data) 347 } 348 } 349 if orVector == nil { 350 orVector = andVector 351 } else { 352 bitutil.ORBytes(orVector, orVector, andVector) 353 } 354 } 355 356 if orVector == nil { 357 orVector = make([]byte, int(m.sectionSize/8)) 358 } 359 if subres.bitset != nil { 360 bitutil.ANDBytes(orVector, orVector, subres.bitset) 361 } 362 if bitutil.TestBytes(orVector) { 363 select { 364 case <-session.quit: 365 return 366 case results <- &partialMatches{subres.section, orVector}: 367 } 368 } 369 } 370 } 371 }() 372 return results 373 } 374 375 // distributor receives requests from the schedulers and queues them into a set 376 // of pending requests, which are assigned to retrievers wanting to fulfil them. 377 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 378 defer session.pend.Done() 379 380 var ( 381 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 382 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 383 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 384 ) 385 var ( 386 allocs int // Number of active allocations to handle graceful shutdown requests 387 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 388 ) 389 390 // assign is a helper method fo try to assign a pending bit an an actively 391 // listening servicer, or schedule it up for later when one arrives. 392 assign := func(bit uint) { 393 select { 394 case fetcher := <-m.retrievers: 395 allocs++ 396 fetcher <- bit 397 default: 398 // No retrievers active, start listening for new ones 399 retrievers = m.retrievers 400 unallocs[bit] = struct{}{} 401 } 402 } 403 404 for { 405 select { 406 case <-shutdown: 407 // Graceful shutdown requested, wait until all pending requests are honoured 408 if allocs == 0 { 409 return 410 } 411 shutdown = nil 412 413 case <-session.kill: 414 // Pending requests not honoured in time, hard terminate 415 return 416 417 case req := <-dist: 418 // New retrieval request arrived to be distributed to some fetcher process 419 queue := requests[req.bit] 420 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 421 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 422 423 // If it's a new bit and we have waiting fetchers, allocate to them 424 if len(queue) == 0 { 425 assign(req.bit) 426 } 427 428 case fetcher := <-retrievers: 429 // New retriever arrived, find the lowest section-ed bit to assign 430 bit, best := uint(0), uint64(math.MaxUint64) 431 for idx := range unallocs { 432 if requests[idx][0] < best { 433 bit, best = idx, requests[idx][0] 434 } 435 } 436 // Stop tracking this bit (and alloc notifications if no more work is available) 437 delete(unallocs, bit) 438 if len(unallocs) == 0 { 439 retrievers = nil 440 } 441 allocs++ 442 fetcher <- bit 443 444 case fetcher := <-m.counters: 445 // New task count request arrives, return number of items 446 fetcher <- uint(len(requests[<-fetcher])) 447 448 case fetcher := <-m.retrievals: 449 // New fetcher waiting for tasks to retrieve, assign 450 task := <-fetcher 451 if want := len(task.Sections); want >= len(requests[task.Bit]) { 452 task.Sections = requests[task.Bit] 453 delete(requests, task.Bit) 454 } else { 455 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 456 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 457 } 458 fetcher <- task 459 460 // If anything was left unallocated, try to assign to someone else 461 if len(requests[task.Bit]) > 0 { 462 assign(task.Bit) 463 } 464 465 case result := <-m.deliveries: 466 // New retrieval task response from fetcher, split out missing sections and 467 // deliver complete ones 468 var ( 469 sections = make([]uint64, 0, len(result.Sections)) 470 bitsets = make([][]byte, 0, len(result.Bitsets)) 471 missing = make([]uint64, 0, len(result.Sections)) 472 ) 473 for i, bitset := range result.Bitsets { 474 if len(bitset) == 0 { 475 missing = append(missing, result.Sections[i]) 476 continue 477 } 478 sections = append(sections, result.Sections[i]) 479 bitsets = append(bitsets, bitset) 480 } 481 m.schedulers[result.Bit].deliver(sections, bitsets) 482 allocs-- 483 484 // Reschedule missing sections and allocate bit if newly available 485 if len(missing) > 0 { 486 queue := requests[result.Bit] 487 for _, section := range missing { 488 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 489 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 490 } 491 requests[result.Bit] = queue 492 493 if len(queue) == len(missing) { 494 assign(result.Bit) 495 } 496 } 497 // If we're in the process of shutting down, terminate 498 if allocs == 0 && shutdown == nil { 499 return 500 } 501 } 502 } 503 } 504 505 // MatcherSession is returned by a started matcher to be used as a terminator 506 // for the actively running matching operation. 507 type MatcherSession struct { 508 matcher *Matcher 509 510 closer sync.Once // Sync object to ensure we only ever close once 511 quit chan struct{} // Quit channel to request pipeline termination 512 kill chan struct{} // Term channel to signal non-graceful forced shutdown 513 514 ctx context.Context // Context used by the light client to abort filtering 515 err atomic.Value // Global error to track retrieval failures deep in the chain 516 517 pend sync.WaitGroup 518 } 519 520 // Close stops the matching process and waits for all subprocesses to terminate 521 // before returning. The timeout may be used for graceful shutdown, allowing the 522 // currently running retrievals to complete before this time. 523 func (s *MatcherSession) Close() { 524 s.closer.Do(func() { 525 // Signal termination and wait for all goroutines to tear down 526 close(s.quit) 527 time.AfterFunc(time.Second, func() { close(s.kill) }) 528 s.pend.Wait() 529 }) 530 } 531 532 // Error returns any failure encountered during the matching session. 533 func (s *MatcherSession) Error() error { 534 if err := s.err.Load(); err != nil { 535 return err.(error) 536 } 537 return nil 538 } 539 540 // AllocateRetrieval assigns a bloom bit index to a client process that can either 541 // immediately reuest and fetch the section contents assigned to this bit or wait 542 // a little while for more sections to be requested. 543 func (s *MatcherSession) AllocateRetrieval() (uint, bool) { 544 fetcher := make(chan uint) 545 546 select { 547 case <-s.quit: 548 return 0, false 549 case s.matcher.retrievers <- fetcher: 550 bit, ok := <-fetcher 551 return bit, ok 552 } 553 } 554 555 // PendingSections returns the number of pending section retrievals belonging to 556 // the given bloom bit index. 557 func (s *MatcherSession) PendingSections(bit uint) int { 558 fetcher := make(chan uint) 559 560 select { 561 case <-s.quit: 562 return 0 563 case s.matcher.counters <- fetcher: 564 fetcher <- bit 565 return int(<-fetcher) 566 } 567 } 568 569 // AllocateSections assigns all or part of an already allocated bit-task queue 570 // to the requesting process. 571 func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { 572 fetcher := make(chan *Retrieval) 573 574 select { 575 case <-s.quit: 576 return nil 577 case s.matcher.retrievals <- fetcher: 578 task := &Retrieval{ 579 Bit: bit, 580 Sections: make([]uint64, count), 581 } 582 fetcher <- task 583 return (<-fetcher).Sections 584 } 585 } 586 587 // DeliverSections delivers a batch of section bit-vectors for a specific bloom 588 // bit index to be injected into the processing pipeline. 589 func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { 590 select { 591 case <-s.kill: 592 return 593 case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: 594 } 595 } 596 597 // Multiplex polls the matcher session for rerieval tasks and multiplexes it into 598 // the reuested retrieval queue to be serviced together with other sessions. 599 // 600 // This method will block for the lifetime of the session. Even after termination 601 // of the session, any request in-flight need to be responded to! Empty responses 602 // are fine though in that case. 603 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 604 for { 605 // Allocate a new bloom bit index to retrieve data for, stopping when done 606 bit, ok := s.AllocateRetrieval() 607 if !ok { 608 return 609 } 610 // Bit allocated, throttle a bit if we're below our batch limit 611 if s.PendingSections(bit) < batch { 612 select { 613 case <-s.quit: 614 // Session terminating, we can't meaningfully service, abort 615 s.AllocateSections(bit, 0) 616 s.DeliverSections(bit, []uint64{}, [][]byte{}) 617 return 618 619 case <-time.After(wait): 620 // Throttling up, fetch whatever's available 621 } 622 } 623 // Allocate as much as we can handle and request servicing 624 sections := s.AllocateSections(bit, batch) 625 request := make(chan *Retrieval) 626 627 select { 628 case <-s.quit: 629 // Session terminating, we can't meaningfully service, abort 630 s.DeliverSections(bit, sections, make([][]byte, len(sections))) 631 return 632 633 case mux <- request: 634 // Retrieval accepted, something must arrive before we're aborting 635 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 636 637 result := <-request 638 if result.Error != nil { 639 s.err.Store(result.Error) 640 s.Close() 641 } 642 s.DeliverSections(result.Bit, result.Sections, result.Bitsets) 643 } 644 } 645 }