github.com/aychain/blockbook@v0.1.1-0.20181121092459-6d1fc7e07c5b/db/sync.go (about) 1 package db 2 3 import ( 4 "blockbook/bchain" 5 "blockbook/common" 6 "os" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 "github.com/golang/glog" 12 "github.com/juju/errors" 13 ) 14 15 // SyncWorker is handle to SyncWorker 16 type SyncWorker struct { 17 db *RocksDB 18 chain bchain.BlockChain 19 syncWorkers, syncChunk int 20 dryRun bool 21 startHeight uint32 22 startHash string 23 chanOsSignal chan os.Signal 24 metrics *common.Metrics 25 is *common.InternalState 26 } 27 28 // NewSyncWorker creates new SyncWorker and returns its handle 29 func NewSyncWorker(db *RocksDB, chain bchain.BlockChain, syncWorkers, syncChunk int, minStartHeight int, dryRun bool, chanOsSignal chan os.Signal, metrics *common.Metrics, is *common.InternalState) (*SyncWorker, error) { 30 if minStartHeight < 0 { 31 minStartHeight = 0 32 } 33 return &SyncWorker{ 34 db: db, 35 chain: chain, 36 syncWorkers: syncWorkers, 37 syncChunk: syncChunk, 38 dryRun: dryRun, 39 startHeight: uint32(minStartHeight), 40 chanOsSignal: chanOsSignal, 41 metrics: metrics, 42 is: is, 43 }, nil 44 } 45 46 var errSynced = errors.New("synced") 47 48 // ResyncIndex synchronizes index to the top of the blockchain 49 // onNewBlock is called when new block is connected, but not in initial parallel sync 50 func (w *SyncWorker) ResyncIndex(onNewBlock bchain.OnNewBlockFunc, initialSync bool) error { 51 start := time.Now() 52 w.is.StartedSync() 53 54 err := w.resyncIndex(onNewBlock, initialSync) 55 56 switch err { 57 case nil: 58 d := time.Since(start) 59 glog.Info("resync: finished in ", d) 60 w.metrics.IndexResyncDuration.Observe(float64(d) / 1e6) // in milliseconds 61 w.metrics.IndexDBSize.Set(float64(w.db.DatabaseSizeOnDisk())) 62 bh, _, err := w.db.GetBestBlock() 63 if err == nil { 64 w.is.FinishedSync(bh) 65 } 66 return nil 67 case errSynced: 68 // this is not actually error but flag that resync wasn't necessary 69 w.is.FinishedSyncNoChange() 70 w.metrics.IndexDBSize.Set(float64(w.db.DatabaseSizeOnDisk())) 71 return nil 72 } 73 74 w.metrics.IndexResyncErrors.With(common.Labels{"error": err.Error()}).Inc() 75 76 return err 77 } 78 79 func (w *SyncWorker) resyncIndex(onNewBlock bchain.OnNewBlockFunc, initialSync bool) error { 80 remoteBestHash, err := w.chain.GetBestBlockHash() 81 if err != nil { 82 return err 83 } 84 localBestHeight, localBestHash, err := w.db.GetBestBlock() 85 if err != nil { 86 return err 87 } 88 // If the locally indexed block is the same as the best block on the network, we're done. 89 if localBestHash == remoteBestHash { 90 glog.Infof("resync: synced at %d %s", localBestHeight, localBestHash) 91 return errSynced 92 } 93 if localBestHash != "" { 94 remoteHash, err := w.chain.GetBlockHash(localBestHeight) 95 // for some coins (eth) remote can be at lower best height after rollback 96 if err != nil && err != bchain.ErrBlockNotFound { 97 return err 98 } 99 if remoteHash != localBestHash { 100 // forked - the remote hash differs from the local hash at the same height 101 glog.Info("resync: local is forked at height ", localBestHeight, ", local hash ", localBestHash, ", remote hash", remoteHash) 102 return w.handleFork(localBestHeight, localBestHash, onNewBlock, initialSync) 103 } 104 glog.Info("resync: local at ", localBestHeight, " is behind") 105 w.startHeight = localBestHeight + 1 106 } else { 107 // database is empty, start genesis 108 glog.Info("resync: genesis from block ", w.startHeight) 109 } 110 w.startHash, err = w.chain.GetBlockHash(w.startHeight) 111 if err != nil { 112 return err 113 } 114 // if parallel operation is enabled and the number of blocks to be connected is large, 115 // use parallel routine to load majority of blocks 116 if w.syncWorkers > 1 { 117 remoteBestHeight, err := w.chain.GetBestBlockHeight() 118 if err != nil { 119 return err 120 } 121 if remoteBestHeight < w.startHeight { 122 glog.Error("resync: error - remote best height ", remoteBestHeight, " less than sync start height ", w.startHeight) 123 return errors.New("resync: remote best height error") 124 } 125 if remoteBestHeight-w.startHeight > uint32(w.syncChunk) { 126 glog.Infof("resync: parallel sync of blocks %d-%d, using %d workers", w.startHeight, remoteBestHeight, w.syncWorkers) 127 err = w.ConnectBlocksParallel(w.startHeight, remoteBestHeight) 128 if err != nil { 129 return err 130 } 131 // after parallel load finish the sync using standard way, 132 // new blocks may have been created in the meantime 133 return w.resyncIndex(onNewBlock, initialSync) 134 } 135 } 136 return w.connectBlocks(onNewBlock, initialSync) 137 } 138 139 func (w *SyncWorker) handleFork(localBestHeight uint32, localBestHash string, onNewBlock bchain.OnNewBlockFunc, initialSync bool) error { 140 // find forked blocks, disconnect them and then synchronize again 141 var height uint32 142 hashes := []string{localBestHash} 143 for height = localBestHeight - 1; height >= 0; height-- { 144 local, err := w.db.GetBlockHash(height) 145 if err != nil { 146 return err 147 } 148 if local == "" { 149 break 150 } 151 remote, err := w.chain.GetBlockHash(height) 152 // for some coins (eth) remote can be at lower best height after rollback 153 if err != nil && err != bchain.ErrBlockNotFound { 154 return err 155 } 156 if local == remote { 157 break 158 } 159 hashes = append(hashes, local) 160 } 161 if err := w.DisconnectBlocks(height+1, localBestHeight, hashes); err != nil { 162 return err 163 } 164 return w.resyncIndex(onNewBlock, initialSync) 165 } 166 167 func (w *SyncWorker) connectBlocks(onNewBlock bchain.OnNewBlockFunc, initialSync bool) error { 168 bch := make(chan blockResult, 8) 169 done := make(chan struct{}) 170 defer close(done) 171 172 go w.getBlockChain(bch, done) 173 174 var lastRes, empty blockResult 175 176 connect := func(res blockResult) error { 177 lastRes = res 178 if res.err != nil { 179 return res.err 180 } 181 err := w.db.ConnectBlock(res.block) 182 if err != nil { 183 return err 184 } 185 if onNewBlock != nil { 186 onNewBlock(res.block.Hash, res.block.Height) 187 } 188 if res.block.Height > 0 && res.block.Height%1000 == 0 { 189 glog.Info("connected block ", res.block.Height, " ", res.block.Hash) 190 } 191 192 return nil 193 } 194 195 if initialSync { 196 ConnectLoop: 197 for { 198 select { 199 case <-w.chanOsSignal: 200 return errors.Errorf("connectBlocks interrupted at height %d", lastRes.block.Height) 201 case res := <-bch: 202 if res == empty { 203 break ConnectLoop 204 } 205 err := connect(res) 206 if err != nil { 207 return err 208 } 209 } 210 } 211 } else { 212 // while regular sync, OS sig is handled by waitForSignalAndShutdown 213 for res := range bch { 214 err := connect(res) 215 if err != nil { 216 return err 217 } 218 } 219 } 220 221 if lastRes.block != nil { 222 glog.Infof("resync: synced at %d %s", lastRes.block.Height, lastRes.block.Hash) 223 } 224 225 return nil 226 } 227 228 // ConnectBlocksParallel uses parallel goroutines to get data from blockchain daemon 229 func (w *SyncWorker) ConnectBlocksParallel(lower, higher uint32) error { 230 type hashHeight struct { 231 hash string 232 height uint32 233 } 234 var err error 235 var wg sync.WaitGroup 236 bch := make([]chan *bchain.Block, w.syncWorkers) 237 for i := 0; i < w.syncWorkers; i++ { 238 bch[i] = make(chan *bchain.Block) 239 } 240 hch := make(chan hashHeight, w.syncWorkers) 241 hchClosed := atomic.Value{} 242 hchClosed.Store(false) 243 writeBlockDone := make(chan struct{}) 244 terminating := make(chan struct{}) 245 writeBlockWorker := func() { 246 defer close(writeBlockDone) 247 bc, err := w.db.InitBulkConnect() 248 if err != nil { 249 glog.Error("sync: InitBulkConnect error ", err) 250 } 251 lastBlock := lower - 1 252 keep := uint32(w.chain.GetChainParser().KeepBlockAddresses()) 253 WriteBlockLoop: 254 for { 255 select { 256 case b := <-bch[(lastBlock+1)%uint32(w.syncWorkers)]: 257 if b == nil { 258 // channel is closed and empty - work is done 259 break WriteBlockLoop 260 } 261 if b.Height != lastBlock+1 { 262 glog.Fatal("writeBlockWorker skipped block, expected block ", lastBlock+1, ", new block ", b.Height) 263 } 264 err := bc.ConnectBlock(b, b.Height+keep > higher) 265 if err != nil { 266 glog.Fatal("writeBlockWorker ", b.Height, " ", b.Hash, " error ", err) 267 } 268 lastBlock = b.Height 269 case <-terminating: 270 break WriteBlockLoop 271 } 272 } 273 err = bc.Close() 274 if err != nil { 275 glog.Error("sync: bulkconnect.Close error ", err) 276 } 277 glog.Info("WriteBlock exiting...") 278 } 279 getBlockWorker := func(i int) { 280 defer wg.Done() 281 var err error 282 var block *bchain.Block 283 GetBlockLoop: 284 for hh := range hch { 285 for { 286 block, err = w.chain.GetBlock(hh.hash, hh.height) 287 if err != nil { 288 // signal came while looping in the error loop 289 if hchClosed.Load() == true { 290 glog.Error("getBlockWorker ", i, " connect block error ", err, ". Exiting...") 291 return 292 } 293 glog.Error("getBlockWorker ", i, " connect block error ", err, ". Retrying...") 294 w.metrics.IndexResyncErrors.With(common.Labels{"error": err.Error()}).Inc() 295 time.Sleep(time.Millisecond * 500) 296 } else { 297 break 298 } 299 } 300 if w.dryRun { 301 continue 302 } 303 select { 304 case bch[hh.height%uint32(w.syncWorkers)] <- block: 305 case <-terminating: 306 break GetBlockLoop 307 } 308 } 309 glog.Info("getBlockWorker ", i, " exiting...") 310 } 311 for i := 0; i < w.syncWorkers; i++ { 312 wg.Add(1) 313 go getBlockWorker(i) 314 } 315 go writeBlockWorker() 316 var hash string 317 start := time.Now() 318 msTime := time.Now().Add(1 * time.Minute) 319 ConnectLoop: 320 for h := lower; h <= higher; { 321 select { 322 case <-w.chanOsSignal: 323 err = errors.Errorf("connectBlocksParallel interrupted at height %d", h) 324 // signal all workers to terminate their loops (error loops are interrupted below) 325 close(terminating) 326 break ConnectLoop 327 default: 328 hash, err = w.chain.GetBlockHash(h) 329 if err != nil { 330 glog.Error("GetBlockHash error ", err) 331 w.metrics.IndexResyncErrors.With(common.Labels{"error": err.Error()}).Inc() 332 time.Sleep(time.Millisecond * 500) 333 continue 334 } 335 hch <- hashHeight{hash, h} 336 if h > 0 && h%1000 == 0 { 337 glog.Info("connecting block ", h, " ", hash, ", elapsed ", time.Since(start), " ", w.db.GetAndResetConnectBlockStats()) 338 start = time.Now() 339 } 340 if msTime.Before(time.Now()) { 341 glog.Info(w.db.GetMemoryStats()) 342 w.metrics.IndexDBSize.Set(float64(w.db.DatabaseSizeOnDisk())) 343 msTime = time.Now().Add(10 * time.Minute) 344 } 345 h++ 346 } 347 } 348 close(hch) 349 // signal stop to workers that are in a error loop 350 hchClosed.Store(true) 351 // wait for workers and close bch that will stop writer loop 352 wg.Wait() 353 for i := 0; i < w.syncWorkers; i++ { 354 close(bch[i]) 355 } 356 <-writeBlockDone 357 return err 358 } 359 360 type blockResult struct { 361 block *bchain.Block 362 err error 363 } 364 365 func (w *SyncWorker) getBlockChain(out chan blockResult, done chan struct{}) { 366 defer close(out) 367 368 hash := w.startHash 369 height := w.startHeight 370 371 // some coins do not return Next hash 372 // must loop until error 373 for { 374 select { 375 case <-done: 376 return 377 default: 378 } 379 block, err := w.chain.GetBlock(hash, height) 380 if err != nil { 381 if err == bchain.ErrBlockNotFound { 382 break 383 } 384 out <- blockResult{err: err} 385 return 386 } 387 hash = block.Next 388 height++ 389 out <- blockResult{block: block} 390 } 391 } 392 393 // DisconnectBlocks removes all data belonging to blocks in range lower-higher, 394 func (w *SyncWorker) DisconnectBlocks(lower uint32, higher uint32, hashes []string) error { 395 glog.Infof("sync: disconnecting blocks %d-%d", lower, higher) 396 // if the chain is UTXO, always use DisconnectBlockRange 397 if w.chain.GetChainParser().IsUTXOChain() { 398 return w.db.DisconnectBlockRangeUTXO(lower, higher) 399 } 400 blocks := make([]*bchain.Block, len(hashes)) 401 var err error 402 // try to get all blocks first to see if we can avoid full scan 403 for i, hash := range hashes { 404 blocks[i], err = w.chain.GetBlock(hash, 0) 405 if err != nil { 406 // cannot get a block, we must do full range scan 407 return w.db.DisconnectBlockRangeNonUTXO(lower, higher) 408 } 409 } 410 // got all blocks to be disconnected, disconnect them one after another 411 for i, block := range blocks { 412 glog.Info("Disconnecting block ", (int(higher) - i), " ", block.Hash) 413 if err = w.db.DisconnectBlock(block); err != nil { 414 return err 415 } 416 } 417 return nil 418 }