github.com/4000d/go-ethereum@v1.8.2-0.20180223170251-423c8bb1d821/core/chain_indexer.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package core 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/ethereum/go-ethereum/common" 27 "github.com/ethereum/go-ethereum/core/types" 28 "github.com/ethereum/go-ethereum/ethdb" 29 "github.com/ethereum/go-ethereum/event" 30 "github.com/ethereum/go-ethereum/log" 31 ) 32 33 // ChainIndexerBackend defines the methods needed to process chain segments in 34 // the background and write the segment results into the database. These can be 35 // used to create filter blooms or CHTs. 36 type ChainIndexerBackend interface { 37 // Reset initiates the processing of a new chain segment, potentially terminating 38 // any partially completed operations (in case of a reorg). 39 Reset(section uint64, prevHead common.Hash) error 40 41 // Process crunches through the next header in the chain segment. The caller 42 // will ensure a sequential order of headers. 43 Process(header *types.Header) 44 45 // Commit finalizes the section metadata and stores it into the database. 46 Commit() error 47 } 48 49 // ChainIndexerChain interface is used for connecting the indexer to a blockchain 50 type ChainIndexerChain interface { 51 // CurrentHeader retrieves the latest locally known header. 52 CurrentHeader() *types.Header 53 54 // SubscribeChainEvent subscribes to new head header notifications. 55 SubscribeChainEvent(ch chan<- ChainEvent) event.Subscription 56 } 57 58 // ChainIndexer does a post-processing job for equally sized sections of the 59 // canonical chain (like BlooomBits and CHT structures). A ChainIndexer is 60 // connected to the blockchain through the event system by starting a 61 // ChainEventLoop in a goroutine. 62 // 63 // Further child ChainIndexers can be added which use the output of the parent 64 // section indexer. These child indexers receive new head notifications only 65 // after an entire section has been finished or in case of rollbacks that might 66 // affect already finished sections. 67 type ChainIndexer struct { 68 chainDb ethdb.Database // Chain database to index the data from 69 indexDb ethdb.Database // Prefixed table-view of the db to write index metadata into 70 backend ChainIndexerBackend // Background processor generating the index data content 71 children []*ChainIndexer // Child indexers to cascade chain updates to 72 73 active uint32 // Flag whether the event loop was started 74 update chan struct{} // Notification channel that headers should be processed 75 quit chan chan error // Quit channel to tear down running goroutines 76 77 sectionSize uint64 // Number of blocks in a single chain segment to process 78 confirmsReq uint64 // Number of confirmations before processing a completed segment 79 80 storedSections uint64 // Number of sections successfully indexed into the database 81 knownSections uint64 // Number of sections known to be complete (block wise) 82 cascadedHead uint64 // Block number of the last completed section cascaded to subindexers 83 84 throttling time.Duration // Disk throttling to prevent a heavy upgrade from hogging resources 85 86 log log.Logger 87 lock sync.RWMutex 88 } 89 90 // NewChainIndexer creates a new chain indexer to do background processing on 91 // chain segments of a given size after certain number of confirmations passed. 92 // The throttling parameter might be used to prevent database thrashing. 93 func NewChainIndexer(chainDb, indexDb ethdb.Database, backend ChainIndexerBackend, section, confirm uint64, throttling time.Duration, kind string) *ChainIndexer { 94 c := &ChainIndexer{ 95 chainDb: chainDb, 96 indexDb: indexDb, 97 backend: backend, 98 update: make(chan struct{}, 1), 99 quit: make(chan chan error), 100 sectionSize: section, 101 confirmsReq: confirm, 102 throttling: throttling, 103 log: log.New("type", kind), 104 } 105 // Initialize database dependent fields and start the updater 106 c.loadValidSections() 107 go c.updateLoop() 108 109 return c 110 } 111 112 // AddKnownSectionHead marks a new section head as known/processed if it is newer 113 // than the already known best section head 114 func (c *ChainIndexer) AddKnownSectionHead(section uint64, shead common.Hash) { 115 c.lock.Lock() 116 defer c.lock.Unlock() 117 118 if section < c.storedSections { 119 return 120 } 121 c.setSectionHead(section, shead) 122 c.setValidSections(section + 1) 123 } 124 125 // Start creates a goroutine to feed chain head events into the indexer for 126 // cascading background processing. Children do not need to be started, they 127 // are notified about new events by their parents. 128 func (c *ChainIndexer) Start(chain ChainIndexerChain) { 129 events := make(chan ChainEvent, 10) 130 sub := chain.SubscribeChainEvent(events) 131 132 go c.eventLoop(chain.CurrentHeader(), events, sub) 133 } 134 135 // Close tears down all goroutines belonging to the indexer and returns any error 136 // that might have occurred internally. 137 func (c *ChainIndexer) Close() error { 138 var errs []error 139 140 // Tear down the primary update loop 141 errc := make(chan error) 142 c.quit <- errc 143 if err := <-errc; err != nil { 144 errs = append(errs, err) 145 } 146 // If needed, tear down the secondary event loop 147 if atomic.LoadUint32(&c.active) != 0 { 148 c.quit <- errc 149 if err := <-errc; err != nil { 150 errs = append(errs, err) 151 } 152 } 153 // Close all children 154 for _, child := range c.children { 155 if err := child.Close(); err != nil { 156 errs = append(errs, err) 157 } 158 } 159 // Return any failures 160 switch { 161 case len(errs) == 0: 162 return nil 163 164 case len(errs) == 1: 165 return errs[0] 166 167 default: 168 return fmt.Errorf("%v", errs) 169 } 170 } 171 172 // eventLoop is a secondary - optional - event loop of the indexer which is only 173 // started for the outermost indexer to push chain head events into a processing 174 // queue. 175 func (c *ChainIndexer) eventLoop(currentHeader *types.Header, events chan ChainEvent, sub event.Subscription) { 176 // Mark the chain indexer as active, requiring an additional teardown 177 atomic.StoreUint32(&c.active, 1) 178 179 defer sub.Unsubscribe() 180 181 // Fire the initial new head event to start any outstanding processing 182 c.newHead(currentHeader.Number.Uint64(), false) 183 184 var ( 185 prevHeader = currentHeader 186 prevHash = currentHeader.Hash() 187 ) 188 for { 189 select { 190 case errc := <-c.quit: 191 // Chain indexer terminating, report no failure and abort 192 errc <- nil 193 return 194 195 case ev, ok := <-events: 196 // Received a new event, ensure it's not nil (closing) and update 197 if !ok { 198 errc := <-c.quit 199 errc <- nil 200 return 201 } 202 header := ev.Block.Header() 203 if header.ParentHash != prevHash { 204 // Reorg to the common ancestor (might not exist in light sync mode, skip reorg then) 205 // TODO(karalabe, zsfelfoldi): This seems a bit brittle, can we detect this case explicitly? 206 207 // TODO(karalabe): This operation is expensive and might block, causing the event system to 208 // potentially also lock up. We need to do with on a different thread somehow. 209 if h := FindCommonAncestor(c.chainDb, prevHeader, header); h != nil { 210 c.newHead(h.Number.Uint64(), true) 211 } 212 } 213 c.newHead(header.Number.Uint64(), false) 214 215 prevHeader, prevHash = header, header.Hash() 216 } 217 } 218 } 219 220 // newHead notifies the indexer about new chain heads and/or reorgs. 221 func (c *ChainIndexer) newHead(head uint64, reorg bool) { 222 c.lock.Lock() 223 defer c.lock.Unlock() 224 225 // If a reorg happened, invalidate all sections until that point 226 if reorg { 227 // Revert the known section number to the reorg point 228 changed := head / c.sectionSize 229 if changed < c.knownSections { 230 c.knownSections = changed 231 } 232 // Revert the stored sections from the database to the reorg point 233 if changed < c.storedSections { 234 c.setValidSections(changed) 235 } 236 // Update the new head number to the finalized section end and notify children 237 head = changed * c.sectionSize 238 239 if head < c.cascadedHead { 240 c.cascadedHead = head 241 for _, child := range c.children { 242 child.newHead(c.cascadedHead, true) 243 } 244 } 245 return 246 } 247 // No reorg, calculate the number of newly known sections and update if high enough 248 var sections uint64 249 if head >= c.confirmsReq { 250 sections = (head + 1 - c.confirmsReq) / c.sectionSize 251 if sections > c.knownSections { 252 c.knownSections = sections 253 254 select { 255 case c.update <- struct{}{}: 256 default: 257 } 258 } 259 } 260 } 261 262 // updateLoop is the main event loop of the indexer which pushes chain segments 263 // down into the processing backend. 264 func (c *ChainIndexer) updateLoop() { 265 var ( 266 updating bool 267 updated time.Time 268 ) 269 270 for { 271 select { 272 case errc := <-c.quit: 273 // Chain indexer terminating, report no failure and abort 274 errc <- nil 275 return 276 277 case <-c.update: 278 // Section headers completed (or rolled back), update the index 279 c.lock.Lock() 280 if c.knownSections > c.storedSections { 281 // Periodically print an upgrade log message to the user 282 if time.Since(updated) > 8*time.Second { 283 if c.knownSections > c.storedSections+1 { 284 updating = true 285 c.log.Info("Upgrading chain index", "percentage", c.storedSections*100/c.knownSections) 286 } 287 updated = time.Now() 288 } 289 // Cache the current section count and head to allow unlocking the mutex 290 section := c.storedSections 291 var oldHead common.Hash 292 if section > 0 { 293 oldHead = c.SectionHead(section - 1) 294 } 295 // Process the newly defined section in the background 296 c.lock.Unlock() 297 newHead, err := c.processSection(section, oldHead) 298 if err != nil { 299 c.log.Error("Section processing failed", "error", err) 300 } 301 c.lock.Lock() 302 303 // If processing succeeded and no reorgs occcurred, mark the section completed 304 if err == nil && oldHead == c.SectionHead(section-1) { 305 c.setSectionHead(section, newHead) 306 c.setValidSections(section + 1) 307 if c.storedSections == c.knownSections && updating { 308 updating = false 309 c.log.Info("Finished upgrading chain index") 310 } 311 312 c.cascadedHead = c.storedSections*c.sectionSize - 1 313 for _, child := range c.children { 314 c.log.Trace("Cascading chain index update", "head", c.cascadedHead) 315 child.newHead(c.cascadedHead, false) 316 } 317 } else { 318 // If processing failed, don't retry until further notification 319 c.log.Debug("Chain index processing failed", "section", section, "err", err) 320 c.knownSections = c.storedSections 321 } 322 } 323 // If there are still further sections to process, reschedule 324 if c.knownSections > c.storedSections { 325 time.AfterFunc(c.throttling, func() { 326 select { 327 case c.update <- struct{}{}: 328 default: 329 } 330 }) 331 } 332 c.lock.Unlock() 333 } 334 } 335 } 336 337 // processSection processes an entire section by calling backend functions while 338 // ensuring the continuity of the passed headers. Since the chain mutex is not 339 // held while processing, the continuity can be broken by a long reorg, in which 340 // case the function returns with an error. 341 func (c *ChainIndexer) processSection(section uint64, lastHead common.Hash) (common.Hash, error) { 342 c.log.Trace("Processing new chain section", "section", section) 343 344 // Reset and partial processing 345 346 if err := c.backend.Reset(section, lastHead); err != nil { 347 c.setValidSections(0) 348 return common.Hash{}, err 349 } 350 351 for number := section * c.sectionSize; number < (section+1)*c.sectionSize; number++ { 352 hash := GetCanonicalHash(c.chainDb, number) 353 if hash == (common.Hash{}) { 354 return common.Hash{}, fmt.Errorf("canonical block #%d unknown", number) 355 } 356 header := GetHeader(c.chainDb, hash, number) 357 if header == nil { 358 return common.Hash{}, fmt.Errorf("block #%d [%x…] not found", number, hash[:4]) 359 } else if header.ParentHash != lastHead { 360 return common.Hash{}, fmt.Errorf("chain reorged during section processing") 361 } 362 c.backend.Process(header) 363 lastHead = header.Hash() 364 } 365 if err := c.backend.Commit(); err != nil { 366 c.log.Error("Section commit failed", "error", err) 367 return common.Hash{}, err 368 } 369 return lastHead, nil 370 } 371 372 // Sections returns the number of processed sections maintained by the indexer 373 // and also the information about the last header indexed for potential canonical 374 // verifications. 375 func (c *ChainIndexer) Sections() (uint64, uint64, common.Hash) { 376 c.lock.Lock() 377 defer c.lock.Unlock() 378 379 return c.storedSections, c.storedSections*c.sectionSize - 1, c.SectionHead(c.storedSections - 1) 380 } 381 382 // AddChildIndexer adds a child ChainIndexer that can use the output of this one 383 func (c *ChainIndexer) AddChildIndexer(indexer *ChainIndexer) { 384 c.lock.Lock() 385 defer c.lock.Unlock() 386 387 c.children = append(c.children, indexer) 388 389 // Cascade any pending updates to new children too 390 if c.storedSections > 0 { 391 indexer.newHead(c.storedSections*c.sectionSize-1, false) 392 } 393 } 394 395 // loadValidSections reads the number of valid sections from the index database 396 // and caches is into the local state. 397 func (c *ChainIndexer) loadValidSections() { 398 data, _ := c.indexDb.Get([]byte("count")) 399 if len(data) == 8 { 400 c.storedSections = binary.BigEndian.Uint64(data[:]) 401 } 402 } 403 404 // setValidSections writes the number of valid sections to the index database 405 func (c *ChainIndexer) setValidSections(sections uint64) { 406 // Set the current number of valid sections in the database 407 var data [8]byte 408 binary.BigEndian.PutUint64(data[:], sections) 409 c.indexDb.Put([]byte("count"), data[:]) 410 411 // Remove any reorged sections, caching the valids in the mean time 412 for c.storedSections > sections { 413 c.storedSections-- 414 c.removeSectionHead(c.storedSections) 415 } 416 c.storedSections = sections // needed if new > old 417 } 418 419 // SectionHead retrieves the last block hash of a processed section from the 420 // index database. 421 func (c *ChainIndexer) SectionHead(section uint64) common.Hash { 422 var data [8]byte 423 binary.BigEndian.PutUint64(data[:], section) 424 425 hash, _ := c.indexDb.Get(append([]byte("shead"), data[:]...)) 426 if len(hash) == len(common.Hash{}) { 427 return common.BytesToHash(hash) 428 } 429 return common.Hash{} 430 } 431 432 // setSectionHead writes the last block hash of a processed section to the index 433 // database. 434 func (c *ChainIndexer) setSectionHead(section uint64, hash common.Hash) { 435 var data [8]byte 436 binary.BigEndian.PutUint64(data[:], section) 437 438 c.indexDb.Put(append([]byte("shead"), data[:]...), hash.Bytes()) 439 } 440 441 // removeSectionHead removes the reference to a processed section from the index 442 // database. 443 func (c *ChainIndexer) removeSectionHead(section uint64) { 444 var data [8]byte 445 binary.BigEndian.PutUint64(data[:], section) 446 447 c.indexDb.Delete(append([]byte("shead"), data[:]...)) 448 }