github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/swarm/storage/dpa.go (about) 1 // This file is part of the go-sberex library. The go-sberex library is 2 // free software: you can redistribute it and/or modify it under the terms 3 // of the GNU Lesser General Public License as published by the Free 4 // Software Foundation, either version 3 of the License, or (at your option) 5 // any later version. 6 // 7 // The go-sberex library is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 10 // General Public License <http://www.gnu.org/licenses/> for more details. 11 12 package storage 13 14 import ( 15 "errors" 16 "fmt" 17 "io" 18 "sync" 19 "time" 20 21 "github.com/Sberex/go-sberex/log" 22 ) 23 24 /* 25 DPA provides the client API entrypoints Store and Retrieve to store and retrieve 26 It can store anything that has a byte slice representation, so files or serialised objects etc. 27 28 Storage: DPA calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client. 29 30 Retrieval: given the key of the root block, the DPA retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read. 31 32 As the chunker produces chunks, DPA dispatches them to its own chunk store 33 implementation for storage or retrieval. 34 */ 35 36 const ( 37 storeChanCapacity = 100 38 retrieveChanCapacity = 100 39 singletonSwarmDbCapacity = 50000 40 singletonSwarmCacheCapacity = 500 41 maxStoreProcesses = 8 42 maxRetrieveProcesses = 8 43 ) 44 45 var ( 46 notFound = errors.New("not found") 47 ) 48 49 type DPA struct { 50 ChunkStore 51 storeC chan *Chunk 52 retrieveC chan *Chunk 53 Chunker Chunker 54 55 lock sync.Mutex 56 running bool 57 quitC chan bool 58 } 59 60 // for testing locally 61 func NewLocalDPA(datadir string) (*DPA, error) { 62 63 hash := MakeHashFunc("SHA256") 64 65 dbStore, err := NewDbStore(datadir, hash, singletonSwarmDbCapacity, 0) 66 if err != nil { 67 return nil, err 68 } 69 70 return NewDPA(&LocalStore{ 71 NewMemStore(dbStore, singletonSwarmCacheCapacity), 72 dbStore, 73 }, NewChunkerParams()), nil 74 } 75 76 func NewDPA(store ChunkStore, params *ChunkerParams) *DPA { 77 chunker := NewTreeChunker(params) 78 return &DPA{ 79 Chunker: chunker, 80 ChunkStore: store, 81 } 82 } 83 84 // Public API. Main entry point for document retrieval directly. Used by the 85 // FS-aware API and httpaccess 86 // Chunk retrieval blocks on netStore requests with a timeout so reader will 87 // report error if retrieval of chunks within requested range time out. 88 func (self *DPA) Retrieve(key Key) LazySectionReader { 89 return self.Chunker.Join(key, self.retrieveC) 90 } 91 92 // Public API. Main entry point for document storage directly. Used by the 93 // FS-aware API and httpaccess 94 func (self *DPA) Store(data io.Reader, size int64, swg *sync.WaitGroup, wwg *sync.WaitGroup) (key Key, err error) { 95 return self.Chunker.Split(data, size, self.storeC, swg, wwg) 96 } 97 98 func (self *DPA) Start() { 99 self.lock.Lock() 100 defer self.lock.Unlock() 101 if self.running { 102 return 103 } 104 self.running = true 105 self.retrieveC = make(chan *Chunk, retrieveChanCapacity) 106 self.storeC = make(chan *Chunk, storeChanCapacity) 107 self.quitC = make(chan bool) 108 self.storeLoop() 109 self.retrieveLoop() 110 } 111 112 func (self *DPA) Stop() { 113 self.lock.Lock() 114 defer self.lock.Unlock() 115 if !self.running { 116 return 117 } 118 self.running = false 119 close(self.quitC) 120 } 121 122 // retrieveLoop dispatches the parallel chunk retrieval requests received on the 123 // retrieve channel to its ChunkStore (NetStore or LocalStore) 124 func (self *DPA) retrieveLoop() { 125 for i := 0; i < maxRetrieveProcesses; i++ { 126 go self.retrieveWorker() 127 } 128 log.Trace(fmt.Sprintf("dpa: retrieve loop spawning %v workers", maxRetrieveProcesses)) 129 } 130 131 func (self *DPA) retrieveWorker() { 132 for chunk := range self.retrieveC { 133 log.Trace(fmt.Sprintf("dpa: retrieve loop : chunk %v", chunk.Key.Log())) 134 storedChunk, err := self.Get(chunk.Key) 135 if err == notFound { 136 log.Trace(fmt.Sprintf("chunk %v not found", chunk.Key.Log())) 137 } else if err != nil { 138 log.Trace(fmt.Sprintf("error retrieving chunk %v: %v", chunk.Key.Log(), err)) 139 } else { 140 chunk.SData = storedChunk.SData 141 chunk.Size = storedChunk.Size 142 } 143 close(chunk.C) 144 145 select { 146 case <-self.quitC: 147 return 148 default: 149 } 150 } 151 } 152 153 // storeLoop dispatches the parallel chunk store request processors 154 // received on the store channel to its ChunkStore (NetStore or LocalStore) 155 func (self *DPA) storeLoop() { 156 for i := 0; i < maxStoreProcesses; i++ { 157 go self.storeWorker() 158 } 159 log.Trace(fmt.Sprintf("dpa: store spawning %v workers", maxStoreProcesses)) 160 } 161 162 func (self *DPA) storeWorker() { 163 164 for chunk := range self.storeC { 165 self.Put(chunk) 166 if chunk.wg != nil { 167 log.Trace(fmt.Sprintf("dpa: store processor %v", chunk.Key.Log())) 168 chunk.wg.Done() 169 170 } 171 select { 172 case <-self.quitC: 173 return 174 default: 175 } 176 } 177 } 178 179 // DpaChunkStore implements the ChunkStore interface, 180 // this chunk access layer assumed 2 chunk stores 181 // local storage eg. LocalStore and network storage eg., NetStore 182 // access by calling network is blocking with a timeout 183 184 type dpaChunkStore struct { 185 n int 186 localStore ChunkStore 187 netStore ChunkStore 188 } 189 190 func NewDpaChunkStore(localStore, netStore ChunkStore) *dpaChunkStore { 191 return &dpaChunkStore{0, localStore, netStore} 192 } 193 194 // Get is the entrypoint for local retrieve requests 195 // waits for response or times out 196 func (self *dpaChunkStore) Get(key Key) (chunk *Chunk, err error) { 197 chunk, err = self.netStore.Get(key) 198 // timeout := time.Now().Add(searchTimeout) 199 if chunk.SData != nil { 200 log.Trace(fmt.Sprintf("DPA.Get: %v found locally, %d bytes", key.Log(), len(chunk.SData))) 201 return 202 } 203 // TODO: use self.timer time.Timer and reset with defer disableTimer 204 timer := time.After(searchTimeout) 205 select { 206 case <-timer: 207 log.Trace(fmt.Sprintf("DPA.Get: %v request time out ", key.Log())) 208 err = notFound 209 case <-chunk.Req.C: 210 log.Trace(fmt.Sprintf("DPA.Get: %v retrieved, %d bytes (%p)", key.Log(), len(chunk.SData), chunk)) 211 } 212 return 213 } 214 215 // Put is the entrypoint for local store requests coming from storeLoop 216 func (self *dpaChunkStore) Put(entry *Chunk) { 217 chunk, err := self.localStore.Get(entry.Key) 218 if err != nil { 219 log.Trace(fmt.Sprintf("DPA.Put: %v new chunk. call netStore.Put", entry.Key.Log())) 220 chunk = entry 221 } else if chunk.SData == nil { 222 log.Trace(fmt.Sprintf("DPA.Put: %v request entry found", entry.Key.Log())) 223 chunk.SData = entry.SData 224 chunk.Size = entry.Size 225 } else { 226 log.Trace(fmt.Sprintf("DPA.Put: %v chunk already known", entry.Key.Log())) 227 return 228 } 229 // from this point on the storage logic is the same with network storage requests 230 log.Trace(fmt.Sprintf("DPA.Put %v: %v", self.n, chunk.Key.Log())) 231 self.n++ 232 self.netStore.Put(chunk) 233 } 234 235 // Close chunk store 236 func (self *dpaChunkStore) Close() {}