github.com/etherbanking/go-etherbanking@v1.7.1-0.20181009210156-cf649bca5aba/core/bloombits/matcher.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package bloombits 18 19 import ( 20 "bytes" 21 "errors" 22 "math" 23 "sort" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "github.com/etherbanking/go-etherbanking/common/bitutil" 29 "github.com/etherbanking/go-etherbanking/crypto" 30 ) 31 32 // bloomIndexes represents the bit indexes inside the bloom filter that belong 33 // to some key. 34 type bloomIndexes [3]uint 35 36 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 37 func calcBloomIndexes(b []byte) bloomIndexes { 38 b = crypto.Keccak256(b) 39 40 var idxs bloomIndexes 41 for i := 0; i < len(idxs); i++ { 42 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 43 } 44 return idxs 45 } 46 47 // partialMatches with a non-nil vector represents a section in which some sub- 48 // matchers have already found potential matches. Subsequent sub-matchers will 49 // binary AND their matches with this vector. If vector is nil, it represents a 50 // section to be processed by the first sub-matcher. 51 type partialMatches struct { 52 section uint64 53 bitset []byte 54 } 55 56 // Retrieval represents a request for retrieval task assignments for a given 57 // bit with the given number of fetch elements, or a response for such a request. 58 // It can also have the actual results set to be used as a delivery data struct. 59 type Retrieval struct { 60 Bit uint 61 Sections []uint64 62 Bitsets [][]byte 63 } 64 65 // Matcher is a pipelined system of schedulers and logic matchers which perform 66 // binary AND/OR operations on the bit-streams, creating a stream of potential 67 // blocks to inspect for data content. 68 type Matcher struct { 69 sectionSize uint64 // Size of the data batches to filter on 70 71 filters [][]bloomIndexes // Filter the system is matching for 72 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 73 74 retrievers chan chan uint // Retriever processes waiting for bit allocations 75 counters chan chan uint // Retriever processes waiting for task count reports 76 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 77 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 78 79 running uint32 // Atomic flag whether a session is live or not 80 } 81 82 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 83 // address and topic filtering on them. 84 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 85 // Create the matcher instance 86 m := &Matcher{ 87 sectionSize: sectionSize, 88 schedulers: make(map[uint]*scheduler), 89 retrievers: make(chan chan uint), 90 counters: make(chan chan uint), 91 retrievals: make(chan chan *Retrieval), 92 deliveries: make(chan *Retrieval), 93 } 94 // Calculate the bloom bit indexes for the groups we're interested in 95 m.filters = nil 96 97 for _, filter := range filters { 98 bloomBits := make([]bloomIndexes, len(filter)) 99 for i, clause := range filter { 100 bloomBits[i] = calcBloomIndexes(clause) 101 } 102 m.filters = append(m.filters, bloomBits) 103 } 104 // For every bit, create a scheduler to load/download the bit vectors 105 for _, bloomIndexLists := range m.filters { 106 for _, bloomIndexList := range bloomIndexLists { 107 for _, bloomIndex := range bloomIndexList { 108 m.addScheduler(bloomIndex) 109 } 110 } 111 } 112 return m 113 } 114 115 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 116 // it has not existed before. If the bit is already selected for filtering, the 117 // existing scheduler can be used. 118 func (m *Matcher) addScheduler(idx uint) { 119 if _, ok := m.schedulers[idx]; ok { 120 return 121 } 122 m.schedulers[idx] = newScheduler(idx) 123 } 124 125 // Start starts the matching process and returns a stream of bloom matches in 126 // a given range of blocks. If there are no more matches in the range, the result 127 // channel is closed. 128 func (m *Matcher) Start(begin, end uint64, results chan uint64) (*MatcherSession, error) { 129 // Make sure we're not creating concurrent sessions 130 if atomic.SwapUint32(&m.running, 1) == 1 { 131 return nil, errors.New("matcher already running") 132 } 133 defer atomic.StoreUint32(&m.running, 0) 134 135 // Initiate a new matching round 136 session := &MatcherSession{ 137 matcher: m, 138 quit: make(chan struct{}), 139 kill: make(chan struct{}), 140 } 141 for _, scheduler := range m.schedulers { 142 scheduler.reset() 143 } 144 sink := m.run(begin, end, cap(results), session) 145 146 // Read the output from the result sink and deliver to the user 147 session.pend.Add(1) 148 go func() { 149 defer session.pend.Done() 150 defer close(results) 151 152 for { 153 select { 154 case <-session.quit: 155 return 156 157 case res, ok := <-sink: 158 // New match result found 159 if !ok { 160 return 161 } 162 // Calculate the first and last blocks of the section 163 sectionStart := res.section * m.sectionSize 164 165 first := sectionStart 166 if begin > first { 167 first = begin 168 } 169 last := sectionStart + m.sectionSize - 1 170 if end < last { 171 last = end 172 } 173 // Iterate over all the blocks in the section and return the matching ones 174 for i := first; i <= last; i++ { 175 // Skip the entire byte if no matches are found inside 176 next := res.bitset[(i-sectionStart)/8] 177 if next == 0 { 178 i += 7 179 continue 180 } 181 // Some bit it set, do the actual submatching 182 if bit := 7 - i%8; next&(1<<bit) != 0 { 183 select { 184 case <-session.quit: 185 return 186 case results <- i: 187 } 188 } 189 } 190 } 191 } 192 }() 193 return session, nil 194 } 195 196 // run creates a daisy-chain of sub-matchers, one for the address set and one 197 // for each topic set, each sub-matcher receiving a section only if the previous 198 // ones have all found a potential match in one of the blocks of the section, 199 // then binary AND-ing its own matches and forwaring the result to the next one. 200 // 201 // The method starts feeding the section indexes into the first sub-matcher on a 202 // new goroutine and returns a sink channel receiving the results. 203 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 204 // Create the source channel and feed section indexes into 205 source := make(chan *partialMatches, buffer) 206 207 session.pend.Add(1) 208 go func() { 209 defer session.pend.Done() 210 defer close(source) 211 212 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 213 select { 214 case <-session.quit: 215 return 216 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 217 } 218 } 219 }() 220 // Assemble the daisy-chained filtering pipeline 221 next := source 222 dist := make(chan *request, buffer) 223 224 for _, bloom := range m.filters { 225 next = m.subMatch(next, dist, bloom, session) 226 } 227 // Start the request distribution 228 session.pend.Add(1) 229 go m.distributor(dist, session) 230 231 return next 232 } 233 234 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 235 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 236 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 237 // that address/topic, and binary AND-ing those vectors together. 238 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 239 // Start the concurrent schedulers for each bit required by the bloom filter 240 sectionSources := make([][3]chan uint64, len(bloom)) 241 sectionSinks := make([][3]chan []byte, len(bloom)) 242 for i, bits := range bloom { 243 for j, bit := range bits { 244 sectionSources[i][j] = make(chan uint64, cap(source)) 245 sectionSinks[i][j] = make(chan []byte, cap(source)) 246 247 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 248 } 249 } 250 251 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 252 results := make(chan *partialMatches, cap(source)) 253 254 session.pend.Add(2) 255 go func() { 256 // Tear down the goroutine and terminate all source channels 257 defer session.pend.Done() 258 defer close(process) 259 260 defer func() { 261 for _, bloomSources := range sectionSources { 262 for _, bitSource := range bloomSources { 263 close(bitSource) 264 } 265 } 266 }() 267 // Read sections from the source channel and multiplex into all bit-schedulers 268 for { 269 select { 270 case <-session.quit: 271 return 272 273 case subres, ok := <-source: 274 // New subresult from previous link 275 if !ok { 276 return 277 } 278 // Multiplex the section index to all bit-schedulers 279 for _, bloomSources := range sectionSources { 280 for _, bitSource := range bloomSources { 281 select { 282 case <-session.quit: 283 return 284 case bitSource <- subres.section: 285 } 286 } 287 } 288 // Notify the processor that this section will become available 289 select { 290 case <-session.quit: 291 return 292 case process <- subres: 293 } 294 } 295 } 296 }() 297 298 go func() { 299 // Tear down the goroutine and terminate the final sink channel 300 defer session.pend.Done() 301 defer close(results) 302 303 // Read the source notifications and collect the delivered results 304 for { 305 select { 306 case <-session.quit: 307 return 308 309 case subres, ok := <-process: 310 // Notified of a section being retrieved 311 if !ok { 312 return 313 } 314 // Gather all the sub-results and merge them together 315 var orVector []byte 316 for _, bloomSinks := range sectionSinks { 317 var andVector []byte 318 for _, bitSink := range bloomSinks { 319 var data []byte 320 select { 321 case <-session.quit: 322 return 323 case data = <-bitSink: 324 } 325 if andVector == nil { 326 andVector = make([]byte, int(m.sectionSize/8)) 327 copy(andVector, data) 328 } else { 329 bitutil.ANDBytes(andVector, andVector, data) 330 } 331 } 332 if orVector == nil { 333 orVector = andVector 334 } else { 335 bitutil.ORBytes(orVector, orVector, andVector) 336 } 337 } 338 339 if orVector == nil { 340 orVector = make([]byte, int(m.sectionSize/8)) 341 } 342 if subres.bitset != nil { 343 bitutil.ANDBytes(orVector, orVector, subres.bitset) 344 } 345 if bitutil.TestBytes(orVector) { 346 select { 347 case <-session.quit: 348 return 349 case results <- &partialMatches{subres.section, orVector}: 350 } 351 } 352 } 353 } 354 }() 355 return results 356 } 357 358 // distributor receives requests from the schedulers and queues them into a set 359 // of pending requests, which are assigned to retrievers wanting to fulfil them. 360 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 361 defer session.pend.Done() 362 363 var ( 364 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 365 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 366 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 367 ) 368 var ( 369 allocs int // Number of active allocations to handle graceful shutdown requests 370 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 371 ) 372 373 // assign is a helper method fo try to assign a pending bit an an actively 374 // listening servicer, or schedule it up for later when one arrives. 375 assign := func(bit uint) { 376 select { 377 case fetcher := <-m.retrievers: 378 allocs++ 379 fetcher <- bit 380 default: 381 // No retrievers active, start listening for new ones 382 retrievers = m.retrievers 383 unallocs[bit] = struct{}{} 384 } 385 } 386 387 for { 388 select { 389 case <-shutdown: 390 // Graceful shutdown requested, wait until all pending requests are honoured 391 if allocs == 0 { 392 return 393 } 394 shutdown = nil 395 396 case <-session.kill: 397 // Pending requests not honoured in time, hard terminate 398 return 399 400 case req := <-dist: 401 // New retrieval request arrived to be distributed to some fetcher process 402 queue := requests[req.bit] 403 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 404 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 405 406 // If it's a new bit and we have waiting fetchers, allocate to them 407 if len(queue) == 0 { 408 assign(req.bit) 409 } 410 411 case fetcher := <-retrievers: 412 // New retriever arrived, find the lowest section-ed bit to assign 413 bit, best := uint(0), uint64(math.MaxUint64) 414 for idx := range unallocs { 415 if requests[idx][0] < best { 416 bit, best = idx, requests[idx][0] 417 } 418 } 419 // Stop tracking this bit (and alloc notifications if no more work is available) 420 delete(unallocs, bit) 421 if len(unallocs) == 0 { 422 retrievers = nil 423 } 424 allocs++ 425 fetcher <- bit 426 427 case fetcher := <-m.counters: 428 // New task count request arrives, return number of items 429 fetcher <- uint(len(requests[<-fetcher])) 430 431 case fetcher := <-m.retrievals: 432 // New fetcher waiting for tasks to retrieve, assign 433 task := <-fetcher 434 if want := len(task.Sections); want >= len(requests[task.Bit]) { 435 task.Sections = requests[task.Bit] 436 delete(requests, task.Bit) 437 } else { 438 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 439 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 440 } 441 fetcher <- task 442 443 // If anything was left unallocated, try to assign to someone else 444 if len(requests[task.Bit]) > 0 { 445 assign(task.Bit) 446 } 447 448 case result := <-m.deliveries: 449 // New retrieval task response from fetcher, split out missing sections and 450 // deliver complete ones 451 var ( 452 sections = make([]uint64, 0, len(result.Sections)) 453 bitsets = make([][]byte, 0, len(result.Bitsets)) 454 missing = make([]uint64, 0, len(result.Sections)) 455 ) 456 for i, bitset := range result.Bitsets { 457 if len(bitset) == 0 { 458 missing = append(missing, result.Sections[i]) 459 continue 460 } 461 sections = append(sections, result.Sections[i]) 462 bitsets = append(bitsets, bitset) 463 } 464 m.schedulers[result.Bit].deliver(sections, bitsets) 465 allocs-- 466 467 // Reschedule missing sections and allocate bit if newly available 468 if len(missing) > 0 { 469 queue := requests[result.Bit] 470 for _, section := range missing { 471 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 472 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 473 } 474 requests[result.Bit] = queue 475 476 if len(queue) == len(missing) { 477 assign(result.Bit) 478 } 479 } 480 // If we're in the process of shutting down, terminate 481 if allocs == 0 && shutdown == nil { 482 return 483 } 484 } 485 } 486 } 487 488 // MatcherSession is returned by a started matcher to be used as a terminator 489 // for the actively running matching operation. 490 type MatcherSession struct { 491 matcher *Matcher 492 493 quit chan struct{} // Quit channel to request pipeline termination 494 kill chan struct{} // Term channel to signal non-graceful forced shutdown 495 pend sync.WaitGroup 496 } 497 498 // Close stops the matching process and waits for all subprocesses to terminate 499 // before returning. The timeout may be used for graceful shutdown, allowing the 500 // currently running retrievals to complete before this time. 501 func (s *MatcherSession) Close(timeout time.Duration) { 502 // Bail out if the matcher is not running 503 select { 504 case <-s.quit: 505 return 506 default: 507 } 508 // Signal termination and wait for all goroutines to tear down 509 close(s.quit) 510 time.AfterFunc(timeout, func() { close(s.kill) }) 511 s.pend.Wait() 512 } 513 514 // AllocateRetrieval assigns a bloom bit index to a client process that can either 515 // immediately reuest and fetch the section contents assigned to this bit or wait 516 // a little while for more sections to be requested. 517 func (s *MatcherSession) AllocateRetrieval() (uint, bool) { 518 fetcher := make(chan uint) 519 520 select { 521 case <-s.quit: 522 return 0, false 523 case s.matcher.retrievers <- fetcher: 524 bit, ok := <-fetcher 525 return bit, ok 526 } 527 } 528 529 // PendingSections returns the number of pending section retrievals belonging to 530 // the given bloom bit index. 531 func (s *MatcherSession) PendingSections(bit uint) int { 532 fetcher := make(chan uint) 533 534 select { 535 case <-s.quit: 536 return 0 537 case s.matcher.counters <- fetcher: 538 fetcher <- bit 539 return int(<-fetcher) 540 } 541 } 542 543 // AllocateSections assigns all or part of an already allocated bit-task queue 544 // to the requesting process. 545 func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { 546 fetcher := make(chan *Retrieval) 547 548 select { 549 case <-s.quit: 550 return nil 551 case s.matcher.retrievals <- fetcher: 552 task := &Retrieval{ 553 Bit: bit, 554 Sections: make([]uint64, count), 555 } 556 fetcher <- task 557 return (<-fetcher).Sections 558 } 559 } 560 561 // DeliverSections delivers a batch of section bit-vectors for a specific bloom 562 // bit index to be injected into the processing pipeline. 563 func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { 564 select { 565 case <-s.kill: 566 return 567 case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: 568 } 569 } 570 571 // Multiplex polls the matcher session for rerieval tasks and multiplexes it into 572 // the reuested retrieval queue to be serviced together with other sessions. 573 // 574 // This method will block for the lifetime of the session. Even after termination 575 // of the session, any request in-flight need to be responded to! Empty responses 576 // are fine though in that case. 577 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 578 for { 579 // Allocate a new bloom bit index to retrieve data for, stopping when done 580 bit, ok := s.AllocateRetrieval() 581 if !ok { 582 return 583 } 584 // Bit allocated, throttle a bit if we're below our batch limit 585 if s.PendingSections(bit) < batch { 586 select { 587 case <-s.quit: 588 // Session terminating, we can't meaningfully service, abort 589 s.AllocateSections(bit, 0) 590 s.DeliverSections(bit, []uint64{}, [][]byte{}) 591 return 592 593 case <-time.After(wait): 594 // Throttling up, fetch whatever's available 595 } 596 } 597 // Allocate as much as we can handle and request servicing 598 sections := s.AllocateSections(bit, batch) 599 request := make(chan *Retrieval) 600 601 select { 602 case <-s.quit: 603 // Session terminating, we can't meaningfully service, abort 604 s.DeliverSections(bit, sections, make([][]byte, len(sections))) 605 return 606 607 case mux <- request: 608 // Retrieval accepted, something must arrive before we're aborting 609 request <- &Retrieval{Bit: bit, Sections: sections} 610 611 result := <-request 612 s.DeliverSections(result.Bit, result.Sections, result.Bitsets) 613 } 614 } 615 }