github.com/jincm/wesharechain@v0.0.0-20210122032815-1537409ce26a/chain/swarm/storage/localstore/localstore.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package localstore 18 19 import ( 20 "encoding/binary" 21 "encoding/hex" 22 "errors" 23 "sync" 24 "time" 25 26 "github.com/ethereum/go-ethereum/log" 27 "github.com/ethereum/go-ethereum/swarm/chunk" 28 "github.com/ethereum/go-ethereum/swarm/shed" 29 "github.com/ethereum/go-ethereum/swarm/storage/mock" 30 ) 31 32 var ( 33 // ErrInvalidMode is retuned when an unknown Mode 34 // is provided to the function. 35 ErrInvalidMode = errors.New("invalid mode") 36 // ErrAddressLockTimeout is returned when the same chunk 37 // is updated in parallel and one of the updates 38 // takes longer then the configured timeout duration. 39 ErrAddressLockTimeout = errors.New("address lock timeout") 40 ) 41 42 var ( 43 // Default value for Capacity DB option. 44 defaultCapacity int64 = 5000000 45 // Limit the number of goroutines created by Getters 46 // that call updateGC function. Value 0 sets no limit. 47 maxParallelUpdateGC = 1000 48 ) 49 50 // DB is the local store implementation and holds 51 // database related objects. 52 type DB struct { 53 shed *shed.DB 54 55 // schema name of loaded data 56 schemaName shed.StringField 57 // field that stores number of intems in gc index 58 storedGCSize shed.Uint64Field 59 60 // retrieval indexes 61 retrievalDataIndex shed.Index 62 retrievalAccessIndex shed.Index 63 // push syncing index 64 pushIndex shed.Index 65 // push syncing subscriptions triggers 66 pushTriggers []chan struct{} 67 pushTriggersMu sync.RWMutex 68 69 // pull syncing index 70 pullIndex shed.Index 71 // pull syncing subscriptions triggers per bin 72 pullTriggers map[uint8][]chan struct{} 73 pullTriggersMu sync.RWMutex 74 75 // garbage collection index 76 gcIndex shed.Index 77 // index that stores hashes that are not 78 // counted in and saved to storedGCSize 79 gcUncountedHashesIndex shed.Index 80 81 // number of elements in garbage collection index 82 // it must be always read by getGCSize and 83 // set with incGCSize which are locking gcSizeMu 84 gcSize int64 85 gcSizeMu sync.RWMutex 86 // garbage collection is triggered when gcSize exceeds 87 // the capacity value 88 capacity int64 89 90 // triggers garbage collection event loop 91 collectGarbageTrigger chan struct{} 92 // triggers write gc size event loop 93 writeGCSizeTrigger chan struct{} 94 95 // a buffered channel acting as a semaphore 96 // to limit the maximal number of goroutines 97 // created by Getters to call updateGC function 98 updateGCSem chan struct{} 99 // a wait group to ensure all updateGC goroutines 100 // are done before closing the database 101 updateGCWG sync.WaitGroup 102 103 baseKey []byte 104 105 addressLocks sync.Map 106 107 // this channel is closed when close function is called 108 // to terminate other goroutines 109 close chan struct{} 110 111 // protect Close method from exiting before 112 // garbage collection and gc size write workers 113 // are done 114 collectGarbageWorkerDone chan struct{} 115 writeGCSizeWorkerDone chan struct{} 116 } 117 118 // Options struct holds optional parameters for configuring DB. 119 type Options struct { 120 // MockStore is a mock node store that is used to store 121 // chunk data in a central store. It can be used to reduce 122 // total storage space requirements in testing large number 123 // of swarm nodes with chunk data deduplication provided by 124 // the mock global store. 125 MockStore *mock.NodeStore 126 // Capacity is a limit that triggers garbage collection when 127 // number of items in gcIndex equals or exceeds it. 128 Capacity int64 129 // MetricsPrefix defines a prefix for metrics names. 130 MetricsPrefix string 131 } 132 133 // New returns a new DB. All fields and indexes are initialized 134 // and possible conflicts with schema from existing database is checked. 135 // One goroutine for writing batches is created. 136 func New(path string, baseKey []byte, o *Options) (db *DB, err error) { 137 if o == nil { 138 o = new(Options) 139 } 140 db = &DB{ 141 capacity: o.Capacity, 142 baseKey: baseKey, 143 // channels collectGarbageTrigger and writeGCSizeTrigger 144 // need to be buffered with the size of 1 145 // to signal another event if it 146 // is triggered during already running function 147 collectGarbageTrigger: make(chan struct{}, 1), 148 writeGCSizeTrigger: make(chan struct{}, 1), 149 close: make(chan struct{}), 150 collectGarbageWorkerDone: make(chan struct{}), 151 writeGCSizeWorkerDone: make(chan struct{}), 152 } 153 if db.capacity <= 0 { 154 db.capacity = defaultCapacity 155 } 156 if maxParallelUpdateGC > 0 { 157 db.updateGCSem = make(chan struct{}, maxParallelUpdateGC) 158 } 159 160 db.shed, err = shed.NewDB(path, o.MetricsPrefix) 161 if err != nil { 162 return nil, err 163 } 164 // Identify current storage schema by arbitrary name. 165 db.schemaName, err = db.shed.NewStringField("schema-name") 166 if err != nil { 167 return nil, err 168 } 169 // Persist gc size. 170 db.storedGCSize, err = db.shed.NewUint64Field("gc-size") 171 if err != nil { 172 return nil, err 173 } 174 // Functions for retrieval data index. 175 var ( 176 encodeValueFunc func(fields shed.Item) (value []byte, err error) 177 decodeValueFunc func(keyItem shed.Item, value []byte) (e shed.Item, err error) 178 ) 179 if o.MockStore != nil { 180 encodeValueFunc = func(fields shed.Item) (value []byte, err error) { 181 b := make([]byte, 8) 182 binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp)) 183 err = o.MockStore.Put(fields.Address, fields.Data) 184 if err != nil { 185 return nil, err 186 } 187 return b, nil 188 } 189 decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 190 e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8])) 191 e.Data, err = o.MockStore.Get(keyItem.Address) 192 return e, err 193 } 194 } else { 195 encodeValueFunc = func(fields shed.Item) (value []byte, err error) { 196 b := make([]byte, 8) 197 binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp)) 198 value = append(b, fields.Data...) 199 return value, nil 200 } 201 decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 202 e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8])) 203 e.Data = value[8:] 204 return e, nil 205 } 206 } 207 // Index storing actual chunk address, data and store timestamp. 208 db.retrievalDataIndex, err = db.shed.NewIndex("Address->StoreTimestamp|Data", shed.IndexFuncs{ 209 EncodeKey: func(fields shed.Item) (key []byte, err error) { 210 return fields.Address, nil 211 }, 212 DecodeKey: func(key []byte) (e shed.Item, err error) { 213 e.Address = key 214 return e, nil 215 }, 216 EncodeValue: encodeValueFunc, 217 DecodeValue: decodeValueFunc, 218 }) 219 if err != nil { 220 return nil, err 221 } 222 // Index storing access timestamp for a particular address. 223 // It is needed in order to update gc index keys for iteration order. 224 db.retrievalAccessIndex, err = db.shed.NewIndex("Address->AccessTimestamp", shed.IndexFuncs{ 225 EncodeKey: func(fields shed.Item) (key []byte, err error) { 226 return fields.Address, nil 227 }, 228 DecodeKey: func(key []byte) (e shed.Item, err error) { 229 e.Address = key 230 return e, nil 231 }, 232 EncodeValue: func(fields shed.Item) (value []byte, err error) { 233 b := make([]byte, 8) 234 binary.BigEndian.PutUint64(b, uint64(fields.AccessTimestamp)) 235 return b, nil 236 }, 237 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 238 e.AccessTimestamp = int64(binary.BigEndian.Uint64(value)) 239 return e, nil 240 }, 241 }) 242 if err != nil { 243 return nil, err 244 } 245 // pull index allows history and live syncing per po bin 246 db.pullIndex, err = db.shed.NewIndex("PO|StoredTimestamp|Hash->nil", shed.IndexFuncs{ 247 EncodeKey: func(fields shed.Item) (key []byte, err error) { 248 key = make([]byte, 41) 249 key[0] = db.po(fields.Address) 250 binary.BigEndian.PutUint64(key[1:9], uint64(fields.StoreTimestamp)) 251 copy(key[9:], fields.Address[:]) 252 return key, nil 253 }, 254 DecodeKey: func(key []byte) (e shed.Item, err error) { 255 e.Address = key[9:] 256 e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[1:9])) 257 return e, nil 258 }, 259 EncodeValue: func(fields shed.Item) (value []byte, err error) { 260 return nil, nil 261 }, 262 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 263 return e, nil 264 }, 265 }) 266 if err != nil { 267 return nil, err 268 } 269 // create a pull syncing triggers used by SubscribePull function 270 db.pullTriggers = make(map[uint8][]chan struct{}) 271 // push index contains as yet unsynced chunks 272 db.pushIndex, err = db.shed.NewIndex("StoredTimestamp|Hash->nil", shed.IndexFuncs{ 273 EncodeKey: func(fields shed.Item) (key []byte, err error) { 274 key = make([]byte, 40) 275 binary.BigEndian.PutUint64(key[:8], uint64(fields.StoreTimestamp)) 276 copy(key[8:], fields.Address[:]) 277 return key, nil 278 }, 279 DecodeKey: func(key []byte) (e shed.Item, err error) { 280 e.Address = key[8:] 281 e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[:8])) 282 return e, nil 283 }, 284 EncodeValue: func(fields shed.Item) (value []byte, err error) { 285 return nil, nil 286 }, 287 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 288 return e, nil 289 }, 290 }) 291 if err != nil { 292 return nil, err 293 } 294 // create a push syncing triggers used by SubscribePush function 295 db.pushTriggers = make([]chan struct{}, 0) 296 // gc index for removable chunk ordered by ascending last access time 297 db.gcIndex, err = db.shed.NewIndex("AccessTimestamp|StoredTimestamp|Hash->nil", shed.IndexFuncs{ 298 EncodeKey: func(fields shed.Item) (key []byte, err error) { 299 b := make([]byte, 16, 16+len(fields.Address)) 300 binary.BigEndian.PutUint64(b[:8], uint64(fields.AccessTimestamp)) 301 binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp)) 302 key = append(b, fields.Address...) 303 return key, nil 304 }, 305 DecodeKey: func(key []byte) (e shed.Item, err error) { 306 e.AccessTimestamp = int64(binary.BigEndian.Uint64(key[:8])) 307 e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[8:16])) 308 e.Address = key[16:] 309 return e, nil 310 }, 311 EncodeValue: func(fields shed.Item) (value []byte, err error) { 312 return nil, nil 313 }, 314 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 315 return e, nil 316 }, 317 }) 318 if err != nil { 319 return nil, err 320 } 321 // gc uncounted hashes index keeps hashes that are in gc index 322 // but not counted in and saved to storedGCSize 323 db.gcUncountedHashesIndex, err = db.shed.NewIndex("Hash->nil", shed.IndexFuncs{ 324 EncodeKey: func(fields shed.Item) (key []byte, err error) { 325 return fields.Address, nil 326 }, 327 DecodeKey: func(key []byte) (e shed.Item, err error) { 328 e.Address = key 329 return e, nil 330 }, 331 EncodeValue: func(fields shed.Item) (value []byte, err error) { 332 return nil, nil 333 }, 334 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 335 return e, nil 336 }, 337 }) 338 if err != nil { 339 return nil, err 340 } 341 342 // count number of elements in garbage collection index 343 gcSize, err := db.storedGCSize.Get() 344 if err != nil { 345 return nil, err 346 } 347 // get number of uncounted hashes 348 gcUncountedSize, err := db.gcUncountedHashesIndex.Count() 349 if err != nil { 350 return nil, err 351 } 352 gcSize += uint64(gcUncountedSize) 353 // remove uncounted hashes from the index and 354 // save the total gcSize after uncounted hashes are removed 355 err = db.writeGCSize(int64(gcSize)) 356 if err != nil { 357 return nil, err 358 } 359 db.incGCSize(int64(gcSize)) 360 361 // start worker to write gc size 362 go db.writeGCSizeWorker() 363 // start garbage collection worker 364 go db.collectGarbageWorker() 365 return db, nil 366 } 367 368 // Close closes the underlying database. 369 func (db *DB) Close() (err error) { 370 close(db.close) 371 db.updateGCWG.Wait() 372 373 // wait for gc worker and gc size write workers to 374 // return before closing the shed 375 timeout := time.After(5 * time.Second) 376 select { 377 case <-db.collectGarbageWorkerDone: 378 case <-timeout: 379 log.Error("localstore: collect garbage worker did not return after db close") 380 } 381 select { 382 case <-db.writeGCSizeWorkerDone: 383 case <-timeout: 384 log.Error("localstore: write gc size worker did not return after db close") 385 } 386 387 if err := db.writeGCSize(db.getGCSize()); err != nil { 388 log.Error("localstore: write gc size", "err", err) 389 } 390 return db.shed.Close() 391 } 392 393 // po computes the proximity order between the address 394 // and database base key. 395 func (db *DB) po(addr chunk.Address) (bin uint8) { 396 return uint8(chunk.Proximity(db.baseKey, addr)) 397 } 398 399 var ( 400 // Maximal time for lockAddr to wait until it 401 // returns error. 402 addressLockTimeout = 3 * time.Second 403 // duration between two lock checks in lockAddr. 404 addressLockCheckDelay = 30 * time.Microsecond 405 ) 406 407 // lockAddr sets the lock on a particular address 408 // using addressLocks sync.Map and returns unlock function. 409 // If the address is locked this function will check it 410 // in a for loop for addressLockTimeout time, after which 411 // it will return ErrAddressLockTimeout error. 412 func (db *DB) lockAddr(addr chunk.Address) (unlock func(), err error) { 413 start := time.Now() 414 lockKey := hex.EncodeToString(addr) 415 for { 416 _, loaded := db.addressLocks.LoadOrStore(lockKey, struct{}{}) 417 if !loaded { 418 break 419 } 420 time.Sleep(addressLockCheckDelay) 421 if time.Since(start) > addressLockTimeout { 422 return nil, ErrAddressLockTimeout 423 } 424 } 425 return func() { db.addressLocks.Delete(lockKey) }, nil 426 } 427 428 // chunkToItem creates new Item with data provided by the Chunk. 429 func chunkToItem(ch chunk.Chunk) shed.Item { 430 return shed.Item{ 431 Address: ch.Address(), 432 Data: ch.Data(), 433 } 434 } 435 436 // addressToItem creates new Item with a provided address. 437 func addressToItem(addr chunk.Address) shed.Item { 438 return shed.Item{ 439 Address: addr, 440 } 441 } 442 443 // now is a helper function that returns a current unix timestamp 444 // in UTC timezone. 445 // It is set in the init function for usage in production, and 446 // optionally overridden in tests for data validation. 447 var now func() int64 448 449 func init() { 450 // set the now function 451 now = func() (t int64) { 452 return time.Now().UTC().UnixNano() 453 } 454 }