github.com/alanchchen/go-ethereum@v1.6.6-0.20170601190819-6171d01b1195/swarm/storage/dpa.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "errors" 21 "fmt" 22 "io" 23 "sync" 24 "time" 25 26 "github.com/ethereum/go-ethereum/log" 27 ) 28 29 /* 30 DPA provides the client API entrypoints Store and Retrieve to store and retrieve 31 It can store anything that has a byte slice representation, so files or serialised objects etc. 32 33 Storage: DPA calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client. 34 35 Retrieval: given the key of the root block, the DPA retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read. 36 37 As the chunker produces chunks, DPA dispatches them to its own chunk store 38 implementation for storage or retrieval. 39 */ 40 41 const ( 42 storeChanCapacity = 100 43 retrieveChanCapacity = 100 44 singletonSwarmDbCapacity = 50000 45 singletonSwarmCacheCapacity = 500 46 maxStoreProcesses = 8 47 maxRetrieveProcesses = 8 48 ) 49 50 var ( 51 notFound = errors.New("not found") 52 ) 53 54 type DPA struct { 55 ChunkStore 56 storeC chan *Chunk 57 retrieveC chan *Chunk 58 Chunker Chunker 59 60 lock sync.Mutex 61 running bool 62 wg *sync.WaitGroup 63 quitC chan bool 64 } 65 66 // for testing locally 67 func NewLocalDPA(datadir string) (*DPA, error) { 68 69 hash := MakeHashFunc("SHA256") 70 71 dbStore, err := NewDbStore(datadir, hash, singletonSwarmDbCapacity, 0) 72 if err != nil { 73 return nil, err 74 } 75 76 return NewDPA(&LocalStore{ 77 NewMemStore(dbStore, singletonSwarmCacheCapacity), 78 dbStore, 79 }, NewChunkerParams()), nil 80 } 81 82 func NewDPA(store ChunkStore, params *ChunkerParams) *DPA { 83 chunker := NewTreeChunker(params) 84 return &DPA{ 85 Chunker: chunker, 86 ChunkStore: store, 87 } 88 } 89 90 // Public API. Main entry point for document retrieval directly. Used by the 91 // FS-aware API and httpaccess 92 // Chunk retrieval blocks on netStore requests with a timeout so reader will 93 // report error if retrieval of chunks within requested range time out. 94 func (self *DPA) Retrieve(key Key) LazySectionReader { 95 return self.Chunker.Join(key, self.retrieveC) 96 } 97 98 // Public API. Main entry point for document storage directly. Used by the 99 // FS-aware API and httpaccess 100 func (self *DPA) Store(data io.Reader, size int64, swg *sync.WaitGroup, wwg *sync.WaitGroup) (key Key, err error) { 101 return self.Chunker.Split(data, size, self.storeC, swg, wwg) 102 } 103 104 func (self *DPA) Start() { 105 self.lock.Lock() 106 defer self.lock.Unlock() 107 if self.running { 108 return 109 } 110 self.running = true 111 self.retrieveC = make(chan *Chunk, retrieveChanCapacity) 112 self.storeC = make(chan *Chunk, storeChanCapacity) 113 self.quitC = make(chan bool) 114 self.storeLoop() 115 self.retrieveLoop() 116 } 117 118 func (self *DPA) Stop() { 119 self.lock.Lock() 120 defer self.lock.Unlock() 121 if !self.running { 122 return 123 } 124 self.running = false 125 close(self.quitC) 126 } 127 128 // retrieveLoop dispatches the parallel chunk retrieval requests received on the 129 // retrieve channel to its ChunkStore (NetStore or LocalStore) 130 func (self *DPA) retrieveLoop() { 131 for i := 0; i < maxRetrieveProcesses; i++ { 132 go self.retrieveWorker() 133 } 134 log.Trace(fmt.Sprintf("dpa: retrieve loop spawning %v workers", maxRetrieveProcesses)) 135 } 136 137 func (self *DPA) retrieveWorker() { 138 for chunk := range self.retrieveC { 139 log.Trace(fmt.Sprintf("dpa: retrieve loop : chunk %v", chunk.Key.Log())) 140 storedChunk, err := self.Get(chunk.Key) 141 if err == notFound { 142 log.Trace(fmt.Sprintf("chunk %v not found", chunk.Key.Log())) 143 } else if err != nil { 144 log.Trace(fmt.Sprintf("error retrieving chunk %v: %v", chunk.Key.Log(), err)) 145 } else { 146 chunk.SData = storedChunk.SData 147 chunk.Size = storedChunk.Size 148 } 149 close(chunk.C) 150 151 select { 152 case <-self.quitC: 153 return 154 default: 155 } 156 } 157 } 158 159 // storeLoop dispatches the parallel chunk store request processors 160 // received on the store channel to its ChunkStore (NetStore or LocalStore) 161 func (self *DPA) storeLoop() { 162 for i := 0; i < maxStoreProcesses; i++ { 163 go self.storeWorker() 164 } 165 log.Trace(fmt.Sprintf("dpa: store spawning %v workers", maxStoreProcesses)) 166 } 167 168 func (self *DPA) storeWorker() { 169 170 for chunk := range self.storeC { 171 self.Put(chunk) 172 if chunk.wg != nil { 173 log.Trace(fmt.Sprintf("dpa: store processor %v", chunk.Key.Log())) 174 chunk.wg.Done() 175 176 } 177 select { 178 case <-self.quitC: 179 return 180 default: 181 } 182 } 183 } 184 185 // DpaChunkStore implements the ChunkStore interface, 186 // this chunk access layer assumed 2 chunk stores 187 // local storage eg. LocalStore and network storage eg., NetStore 188 // access by calling network is blocking with a timeout 189 190 type dpaChunkStore struct { 191 n int 192 localStore ChunkStore 193 netStore ChunkStore 194 } 195 196 func NewDpaChunkStore(localStore, netStore ChunkStore) *dpaChunkStore { 197 return &dpaChunkStore{0, localStore, netStore} 198 } 199 200 // Get is the entrypoint for local retrieve requests 201 // waits for response or times out 202 func (self *dpaChunkStore) Get(key Key) (chunk *Chunk, err error) { 203 chunk, err = self.netStore.Get(key) 204 // timeout := time.Now().Add(searchTimeout) 205 if chunk.SData != nil { 206 log.Trace(fmt.Sprintf("DPA.Get: %v found locally, %d bytes", key.Log(), len(chunk.SData))) 207 return 208 } 209 // TODO: use self.timer time.Timer and reset with defer disableTimer 210 timer := time.After(searchTimeout) 211 select { 212 case <-timer: 213 log.Trace(fmt.Sprintf("DPA.Get: %v request time out ", key.Log())) 214 err = notFound 215 case <-chunk.Req.C: 216 log.Trace(fmt.Sprintf("DPA.Get: %v retrieved, %d bytes (%p)", key.Log(), len(chunk.SData), chunk)) 217 } 218 return 219 } 220 221 // Put is the entrypoint for local store requests coming from storeLoop 222 func (self *dpaChunkStore) Put(entry *Chunk) { 223 chunk, err := self.localStore.Get(entry.Key) 224 if err != nil { 225 log.Trace(fmt.Sprintf("DPA.Put: %v new chunk. call netStore.Put", entry.Key.Log())) 226 chunk = entry 227 } else if chunk.SData == nil { 228 log.Trace(fmt.Sprintf("DPA.Put: %v request entry found", entry.Key.Log())) 229 chunk.SData = entry.SData 230 chunk.Size = entry.Size 231 } else { 232 log.Trace(fmt.Sprintf("DPA.Put: %v chunk already known", entry.Key.Log())) 233 return 234 } 235 // from this point on the storage logic is the same with network storage requests 236 log.Trace(fmt.Sprintf("DPA.Put %v: %v", self.n, chunk.Key.Log())) 237 self.n++ 238 self.netStore.Put(chunk) 239 } 240 241 // Close chunk store 242 func (self *dpaChunkStore) Close() { 243 return 244 }