github.com/SmartMeshFoundation/Spectrum@v0.0.0-20220621030607-452a266fee1e/swarm/storage/dpa.go (about) 1 // Copyright 2016 The Spectrum Authors 2 // This file is part of the Spectrum library. 3 // 4 // The Spectrum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The Spectrum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the Spectrum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "errors" 21 "fmt" 22 "io" 23 "sync" 24 "time" 25 26 "github.com/SmartMeshFoundation/Spectrum/log" 27 ) 28 29 /* 30 DPA provides the client API entrypoints Store and Retrieve to store and retrieve 31 It can store anything that has a byte slice representation, so files or serialised objects etc. 32 33 Storage: DPA calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client. 34 35 Retrieval: given the key of the root block, the DPA retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read. 36 37 As the chunker produces chunks, DPA dispatches them to its own chunk store 38 implementation for storage or retrieval. 39 */ 40 41 const ( 42 storeChanCapacity = 100 43 retrieveChanCapacity = 100 44 singletonSwarmDbCapacity = 50000 45 singletonSwarmCacheCapacity = 500 46 maxStoreProcesses = 8 47 maxRetrieveProcesses = 8 48 ) 49 50 var ( 51 notFound = errors.New("not found") 52 ) 53 54 type DPA struct { 55 ChunkStore 56 storeC chan *Chunk 57 retrieveC chan *Chunk 58 Chunker Chunker 59 60 lock sync.Mutex 61 running bool 62 quitC chan bool 63 } 64 65 // for testing locally 66 func NewLocalDPA(datadir string) (*DPA, error) { 67 68 hash := MakeHashFunc("SHA256") 69 70 dbStore, err := NewDbStore(datadir, hash, singletonSwarmDbCapacity, 0) 71 if err != nil { 72 return nil, err 73 } 74 75 return NewDPA(&LocalStore{ 76 NewMemStore(dbStore, singletonSwarmCacheCapacity), 77 dbStore, 78 }, NewChunkerParams()), nil 79 } 80 81 func NewDPA(store ChunkStore, params *ChunkerParams) *DPA { 82 chunker := NewTreeChunker(params) 83 return &DPA{ 84 Chunker: chunker, 85 ChunkStore: store, 86 } 87 } 88 89 // Public API. Main entry point for document retrieval directly. Used by the 90 // FS-aware API and httpaccess 91 // Chunk retrieval blocks on netStore requests with a timeout so reader will 92 // report error if retrieval of chunks within requested range time out. 93 func (self *DPA) Retrieve(key Key) LazySectionReader { 94 return self.Chunker.Join(key, self.retrieveC) 95 } 96 97 // Public API. Main entry point for document storage directly. Used by the 98 // FS-aware API and httpaccess 99 func (self *DPA) Store(data io.Reader, size int64, swg *sync.WaitGroup, wwg *sync.WaitGroup) (key Key, err error) { 100 return self.Chunker.Split(data, size, self.storeC, swg, wwg) 101 } 102 103 func (self *DPA) Start() { 104 self.lock.Lock() 105 defer self.lock.Unlock() 106 if self.running { 107 return 108 } 109 self.running = true 110 self.retrieveC = make(chan *Chunk, retrieveChanCapacity) 111 self.storeC = make(chan *Chunk, storeChanCapacity) 112 self.quitC = make(chan bool) 113 self.storeLoop() 114 self.retrieveLoop() 115 } 116 117 func (self *DPA) Stop() { 118 self.lock.Lock() 119 defer self.lock.Unlock() 120 if !self.running { 121 return 122 } 123 self.running = false 124 close(self.quitC) 125 } 126 127 // retrieveLoop dispatches the parallel chunk retrieval requests received on the 128 // retrieve channel to its ChunkStore (NetStore or LocalStore) 129 func (self *DPA) retrieveLoop() { 130 for i := 0; i < maxRetrieveProcesses; i++ { 131 go self.retrieveWorker() 132 } 133 log.Trace(fmt.Sprintf("dpa: retrieve loop spawning %v workers", maxRetrieveProcesses)) 134 } 135 136 func (self *DPA) retrieveWorker() { 137 for chunk := range self.retrieveC { 138 log.Trace(fmt.Sprintf("dpa: retrieve loop : chunk %v", chunk.Key.Log())) 139 storedChunk, err := self.Get(chunk.Key) 140 if err == notFound { 141 log.Trace(fmt.Sprintf("chunk %v not found", chunk.Key.Log())) 142 } else if err != nil { 143 log.Trace(fmt.Sprintf("error retrieving chunk %v: %v", chunk.Key.Log(), err)) 144 } else { 145 chunk.SData = storedChunk.SData 146 chunk.Size = storedChunk.Size 147 } 148 close(chunk.C) 149 150 select { 151 case <-self.quitC: 152 return 153 default: 154 } 155 } 156 } 157 158 // storeLoop dispatches the parallel chunk store request processors 159 // received on the store channel to its ChunkStore (NetStore or LocalStore) 160 func (self *DPA) storeLoop() { 161 for i := 0; i < maxStoreProcesses; i++ { 162 go self.storeWorker() 163 } 164 log.Trace(fmt.Sprintf("dpa: store spawning %v workers", maxStoreProcesses)) 165 } 166 167 func (self *DPA) storeWorker() { 168 169 for chunk := range self.storeC { 170 self.Put(chunk) 171 if chunk.wg != nil { 172 log.Trace(fmt.Sprintf("dpa: store processor %v", chunk.Key.Log())) 173 chunk.wg.Done() 174 175 } 176 select { 177 case <-self.quitC: 178 return 179 default: 180 } 181 } 182 } 183 184 // DpaChunkStore implements the ChunkStore interface, 185 // this chunk access layer assumed 2 chunk stores 186 // local storage eg. LocalStore and network storage eg., NetStore 187 // access by calling network is blocking with a timeout 188 189 type dpaChunkStore struct { 190 n int 191 localStore ChunkStore 192 netStore ChunkStore 193 } 194 195 func NewDpaChunkStore(localStore, netStore ChunkStore) *dpaChunkStore { 196 return &dpaChunkStore{0, localStore, netStore} 197 } 198 199 // Get is the entrypoint for local retrieve requests 200 // waits for response or times out 201 func (self *dpaChunkStore) Get(key Key) (chunk *Chunk, err error) { 202 chunk, err = self.netStore.Get(key) 203 // timeout := time.Now().Add(searchTimeout) 204 if chunk.SData != nil { 205 log.Trace(fmt.Sprintf("DPA.Get: %v found locally, %d bytes", key.Log(), len(chunk.SData))) 206 return 207 } 208 // TODO: use self.timer time.Timer and reset with defer disableTimer 209 timer := time.After(searchTimeout) 210 select { 211 case <-timer: 212 log.Trace(fmt.Sprintf("DPA.Get: %v request time out ", key.Log())) 213 err = notFound 214 case <-chunk.Req.C: 215 log.Trace(fmt.Sprintf("DPA.Get: %v retrieved, %d bytes (%p)", key.Log(), len(chunk.SData), chunk)) 216 } 217 return 218 } 219 220 // Put is the entrypoint for local store requests coming from storeLoop 221 func (self *dpaChunkStore) Put(entry *Chunk) { 222 chunk, err := self.localStore.Get(entry.Key) 223 if err != nil { 224 log.Trace(fmt.Sprintf("DPA.Put: %v new chunk. call netStore.Put", entry.Key.Log())) 225 chunk = entry 226 } else if chunk.SData == nil { 227 log.Trace(fmt.Sprintf("DPA.Put: %v request entry found", entry.Key.Log())) 228 chunk.SData = entry.SData 229 chunk.Size = entry.Size 230 } else { 231 log.Trace(fmt.Sprintf("DPA.Put: %v chunk already known", entry.Key.Log())) 232 return 233 } 234 // from this point on the storage logic is the same with network storage requests 235 log.Trace(fmt.Sprintf("DPA.Put %v: %v", self.n, chunk.Key.Log())) 236 self.n++ 237 self.netStore.Put(chunk) 238 } 239 240 // Close chunk store 241 func (self *dpaChunkStore) Close() {}