github.com/xxRanger/go-ethereum@v1.8.23/swarm/storage/localstore/localstore.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package localstore 18 19 import ( 20 "encoding/binary" 21 "encoding/hex" 22 "errors" 23 "sync" 24 "time" 25 26 "github.com/ethereum/go-ethereum/log" 27 "github.com/ethereum/go-ethereum/swarm/shed" 28 "github.com/ethereum/go-ethereum/swarm/storage" 29 "github.com/ethereum/go-ethereum/swarm/storage/mock" 30 ) 31 32 var ( 33 // ErrInvalidMode is retuned when an unknown Mode 34 // is provided to the function. 35 ErrInvalidMode = errors.New("invalid mode") 36 // ErrAddressLockTimeout is returned when the same chunk 37 // is updated in parallel and one of the updates 38 // takes longer then the configured timeout duration. 39 ErrAddressLockTimeout = errors.New("address lock timeout") 40 ) 41 42 var ( 43 // Default value for Capacity DB option. 44 defaultCapacity int64 = 5000000 45 // Limit the number of goroutines created by Getters 46 // that call updateGC function. Value 0 sets no limit. 47 maxParallelUpdateGC = 1000 48 ) 49 50 // DB is the local store implementation and holds 51 // database related objects. 52 type DB struct { 53 shed *shed.DB 54 55 // schema name of loaded data 56 schemaName shed.StringField 57 // field that stores number of intems in gc index 58 storedGCSize shed.Uint64Field 59 60 // retrieval indexes 61 retrievalDataIndex shed.Index 62 retrievalAccessIndex shed.Index 63 // push syncing index 64 pushIndex shed.Index 65 // push syncing subscriptions triggers 66 pushTriggers []chan struct{} 67 pushTriggersMu sync.RWMutex 68 69 // pull syncing index 70 pullIndex shed.Index 71 // pull syncing subscriptions triggers per bin 72 pullTriggers map[uint8][]chan struct{} 73 pullTriggersMu sync.RWMutex 74 75 // garbage collection index 76 gcIndex shed.Index 77 // index that stores hashes that are not 78 // counted in and saved to storedGCSize 79 gcUncountedHashesIndex shed.Index 80 81 // number of elements in garbage collection index 82 // it must be always read by getGCSize and 83 // set with incGCSize which are locking gcSizeMu 84 gcSize int64 85 gcSizeMu sync.RWMutex 86 // garbage collection is triggered when gcSize exceeds 87 // the capacity value 88 capacity int64 89 90 // triggers garbage collection event loop 91 collectGarbageTrigger chan struct{} 92 // triggers write gc size event loop 93 writeGCSizeTrigger chan struct{} 94 95 // a buffered channel acting as a semaphore 96 // to limit the maximal number of goroutines 97 // created by Getters to call updateGC function 98 updateGCSem chan struct{} 99 // a wait group to ensure all updateGC goroutines 100 // are done before closing the database 101 updateGCWG sync.WaitGroup 102 103 baseKey []byte 104 105 addressLocks sync.Map 106 107 // this channel is closed when close function is called 108 // to terminate other goroutines 109 close chan struct{} 110 } 111 112 // Options struct holds optional parameters for configuring DB. 113 type Options struct { 114 // MockStore is a mock node store that is used to store 115 // chunk data in a central store. It can be used to reduce 116 // total storage space requirements in testing large number 117 // of swarm nodes with chunk data deduplication provided by 118 // the mock global store. 119 MockStore *mock.NodeStore 120 // Capacity is a limit that triggers garbage collection when 121 // number of items in gcIndex equals or exceeds it. 122 Capacity int64 123 // MetricsPrefix defines a prefix for metrics names. 124 MetricsPrefix string 125 } 126 127 // New returns a new DB. All fields and indexes are initialized 128 // and possible conflicts with schema from existing database is checked. 129 // One goroutine for writing batches is created. 130 func New(path string, baseKey []byte, o *Options) (db *DB, err error) { 131 if o == nil { 132 o = new(Options) 133 } 134 db = &DB{ 135 capacity: o.Capacity, 136 baseKey: baseKey, 137 // channels collectGarbageTrigger and writeGCSizeTrigger 138 // need to be buffered with the size of 1 139 // to signal another event if it 140 // is triggered during already running function 141 collectGarbageTrigger: make(chan struct{}, 1), 142 writeGCSizeTrigger: make(chan struct{}, 1), 143 close: make(chan struct{}), 144 } 145 if db.capacity <= 0 { 146 db.capacity = defaultCapacity 147 } 148 if maxParallelUpdateGC > 0 { 149 db.updateGCSem = make(chan struct{}, maxParallelUpdateGC) 150 } 151 152 db.shed, err = shed.NewDB(path, o.MetricsPrefix) 153 if err != nil { 154 return nil, err 155 } 156 // Identify current storage schema by arbitrary name. 157 db.schemaName, err = db.shed.NewStringField("schema-name") 158 if err != nil { 159 return nil, err 160 } 161 // Persist gc size. 162 db.storedGCSize, err = db.shed.NewUint64Field("gc-size") 163 if err != nil { 164 return nil, err 165 } 166 // Functions for retrieval data index. 167 var ( 168 encodeValueFunc func(fields shed.Item) (value []byte, err error) 169 decodeValueFunc func(keyItem shed.Item, value []byte) (e shed.Item, err error) 170 ) 171 if o.MockStore != nil { 172 encodeValueFunc = func(fields shed.Item) (value []byte, err error) { 173 b := make([]byte, 8) 174 binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp)) 175 err = o.MockStore.Put(fields.Address, fields.Data) 176 if err != nil { 177 return nil, err 178 } 179 return b, nil 180 } 181 decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 182 e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8])) 183 e.Data, err = o.MockStore.Get(keyItem.Address) 184 return e, err 185 } 186 } else { 187 encodeValueFunc = func(fields shed.Item) (value []byte, err error) { 188 b := make([]byte, 8) 189 binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp)) 190 value = append(b, fields.Data...) 191 return value, nil 192 } 193 decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 194 e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8])) 195 e.Data = value[8:] 196 return e, nil 197 } 198 } 199 // Index storing actual chunk address, data and store timestamp. 200 db.retrievalDataIndex, err = db.shed.NewIndex("Address->StoreTimestamp|Data", shed.IndexFuncs{ 201 EncodeKey: func(fields shed.Item) (key []byte, err error) { 202 return fields.Address, nil 203 }, 204 DecodeKey: func(key []byte) (e shed.Item, err error) { 205 e.Address = key 206 return e, nil 207 }, 208 EncodeValue: encodeValueFunc, 209 DecodeValue: decodeValueFunc, 210 }) 211 if err != nil { 212 return nil, err 213 } 214 // Index storing access timestamp for a particular address. 215 // It is needed in order to update gc index keys for iteration order. 216 db.retrievalAccessIndex, err = db.shed.NewIndex("Address->AccessTimestamp", shed.IndexFuncs{ 217 EncodeKey: func(fields shed.Item) (key []byte, err error) { 218 return fields.Address, nil 219 }, 220 DecodeKey: func(key []byte) (e shed.Item, err error) { 221 e.Address = key 222 return e, nil 223 }, 224 EncodeValue: func(fields shed.Item) (value []byte, err error) { 225 b := make([]byte, 8) 226 binary.BigEndian.PutUint64(b, uint64(fields.AccessTimestamp)) 227 return b, nil 228 }, 229 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 230 e.AccessTimestamp = int64(binary.BigEndian.Uint64(value)) 231 return e, nil 232 }, 233 }) 234 if err != nil { 235 return nil, err 236 } 237 // pull index allows history and live syncing per po bin 238 db.pullIndex, err = db.shed.NewIndex("PO|StoredTimestamp|Hash->nil", shed.IndexFuncs{ 239 EncodeKey: func(fields shed.Item) (key []byte, err error) { 240 key = make([]byte, 41) 241 key[0] = db.po(fields.Address) 242 binary.BigEndian.PutUint64(key[1:9], uint64(fields.StoreTimestamp)) 243 copy(key[9:], fields.Address[:]) 244 return key, nil 245 }, 246 DecodeKey: func(key []byte) (e shed.Item, err error) { 247 e.Address = key[9:] 248 e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[1:9])) 249 return e, nil 250 }, 251 EncodeValue: func(fields shed.Item) (value []byte, err error) { 252 return nil, nil 253 }, 254 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 255 return e, nil 256 }, 257 }) 258 if err != nil { 259 return nil, err 260 } 261 // create a pull syncing triggers used by SubscribePull function 262 db.pullTriggers = make(map[uint8][]chan struct{}) 263 // push index contains as yet unsynced chunks 264 db.pushIndex, err = db.shed.NewIndex("StoredTimestamp|Hash->nil", shed.IndexFuncs{ 265 EncodeKey: func(fields shed.Item) (key []byte, err error) { 266 key = make([]byte, 40) 267 binary.BigEndian.PutUint64(key[:8], uint64(fields.StoreTimestamp)) 268 copy(key[8:], fields.Address[:]) 269 return key, nil 270 }, 271 DecodeKey: func(key []byte) (e shed.Item, err error) { 272 e.Address = key[8:] 273 e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[:8])) 274 return e, nil 275 }, 276 EncodeValue: func(fields shed.Item) (value []byte, err error) { 277 return nil, nil 278 }, 279 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 280 return e, nil 281 }, 282 }) 283 if err != nil { 284 return nil, err 285 } 286 // create a push syncing triggers used by SubscribePush function 287 db.pushTriggers = make([]chan struct{}, 0) 288 // gc index for removable chunk ordered by ascending last access time 289 db.gcIndex, err = db.shed.NewIndex("AccessTimestamp|StoredTimestamp|Hash->nil", shed.IndexFuncs{ 290 EncodeKey: func(fields shed.Item) (key []byte, err error) { 291 b := make([]byte, 16, 16+len(fields.Address)) 292 binary.BigEndian.PutUint64(b[:8], uint64(fields.AccessTimestamp)) 293 binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp)) 294 key = append(b, fields.Address...) 295 return key, nil 296 }, 297 DecodeKey: func(key []byte) (e shed.Item, err error) { 298 e.AccessTimestamp = int64(binary.BigEndian.Uint64(key[:8])) 299 e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[8:16])) 300 e.Address = key[16:] 301 return e, nil 302 }, 303 EncodeValue: func(fields shed.Item) (value []byte, err error) { 304 return nil, nil 305 }, 306 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 307 return e, nil 308 }, 309 }) 310 if err != nil { 311 return nil, err 312 } 313 // gc uncounted hashes index keeps hashes that are in gc index 314 // but not counted in and saved to storedGCSize 315 db.gcUncountedHashesIndex, err = db.shed.NewIndex("Hash->nil", shed.IndexFuncs{ 316 EncodeKey: func(fields shed.Item) (key []byte, err error) { 317 return fields.Address, nil 318 }, 319 DecodeKey: func(key []byte) (e shed.Item, err error) { 320 e.Address = key 321 return e, nil 322 }, 323 EncodeValue: func(fields shed.Item) (value []byte, err error) { 324 return nil, nil 325 }, 326 DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { 327 return e, nil 328 }, 329 }) 330 if err != nil { 331 return nil, err 332 } 333 334 // count number of elements in garbage collection index 335 gcSize, err := db.storedGCSize.Get() 336 if err != nil { 337 return nil, err 338 } 339 // get number of uncounted hashes 340 gcUncountedSize, err := db.gcUncountedHashesIndex.Count() 341 if err != nil { 342 return nil, err 343 } 344 gcSize += uint64(gcUncountedSize) 345 // remove uncounted hashes from the index and 346 // save the total gcSize after uncounted hashes are removed 347 err = db.writeGCSize(int64(gcSize)) 348 if err != nil { 349 return nil, err 350 } 351 db.incGCSize(int64(gcSize)) 352 353 // start worker to write gc size 354 go db.writeGCSizeWorker() 355 // start garbage collection worker 356 go db.collectGarbageWorker() 357 return db, nil 358 } 359 360 // Close closes the underlying database. 361 func (db *DB) Close() (err error) { 362 close(db.close) 363 db.updateGCWG.Wait() 364 if err := db.writeGCSize(db.getGCSize()); err != nil { 365 log.Error("localstore: write gc size", "err", err) 366 } 367 return db.shed.Close() 368 } 369 370 // po computes the proximity order between the address 371 // and database base key. 372 func (db *DB) po(addr storage.Address) (bin uint8) { 373 return uint8(storage.Proximity(db.baseKey, addr)) 374 } 375 376 var ( 377 // Maximal time for lockAddr to wait until it 378 // returns error. 379 addressLockTimeout = 3 * time.Second 380 // duration between two lock checks in lockAddr. 381 addressLockCheckDelay = 30 * time.Microsecond 382 ) 383 384 // lockAddr sets the lock on a particular address 385 // using addressLocks sync.Map and returns unlock function. 386 // If the address is locked this function will check it 387 // in a for loop for addressLockTimeout time, after which 388 // it will return ErrAddressLockTimeout error. 389 func (db *DB) lockAddr(addr storage.Address) (unlock func(), err error) { 390 start := time.Now() 391 lockKey := hex.EncodeToString(addr) 392 for { 393 _, loaded := db.addressLocks.LoadOrStore(lockKey, struct{}{}) 394 if !loaded { 395 break 396 } 397 time.Sleep(addressLockCheckDelay) 398 if time.Since(start) > addressLockTimeout { 399 return nil, ErrAddressLockTimeout 400 } 401 } 402 return func() { db.addressLocks.Delete(lockKey) }, nil 403 } 404 405 // chunkToItem creates new Item with data provided by the Chunk. 406 func chunkToItem(ch storage.Chunk) shed.Item { 407 return shed.Item{ 408 Address: ch.Address(), 409 Data: ch.Data(), 410 } 411 } 412 413 // addressToItem creates new Item with a provided address. 414 func addressToItem(addr storage.Address) shed.Item { 415 return shed.Item{ 416 Address: addr, 417 } 418 } 419 420 // now is a helper function that returns a current unix timestamp 421 // in UTC timezone. 422 // It is set in the init function for usage in production, and 423 // optionally overridden in tests for data validation. 424 var now func() int64 425 426 func init() { 427 // set the now function 428 now = func() (t int64) { 429 return time.Now().UTC().UnixNano() 430 } 431 }