github.com/SmartMeshFoundation/Spectrum@v0.0.0-20220621030607-452a266fee1e/swarm/storage/dpa.go (about)

     1  // Copyright 2016 The Spectrum Authors
     2  // This file is part of the Spectrum library.
     3  //
     4  // The Spectrum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The Spectrum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the Spectrum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/SmartMeshFoundation/Spectrum/log"
    27  )
    28  
    29  /*
    30  DPA provides the client API entrypoints Store and Retrieve to store and retrieve
    31  It can store anything that has a byte slice representation, so files or serialised objects etc.
    32  
    33  Storage: DPA calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client.
    34  
    35  Retrieval: given the key of the root block, the DPA retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read.
    36  
    37  As the chunker produces chunks, DPA dispatches them to its own chunk store
    38  implementation for storage or retrieval.
    39  */
    40  
    41  const (
    42  	storeChanCapacity           = 100
    43  	retrieveChanCapacity        = 100
    44  	singletonSwarmDbCapacity    = 50000
    45  	singletonSwarmCacheCapacity = 500
    46  	maxStoreProcesses           = 8
    47  	maxRetrieveProcesses        = 8
    48  )
    49  
    50  var (
    51  	notFound = errors.New("not found")
    52  )
    53  
    54  type DPA struct {
    55  	ChunkStore
    56  	storeC    chan *Chunk
    57  	retrieveC chan *Chunk
    58  	Chunker   Chunker
    59  
    60  	lock    sync.Mutex
    61  	running bool
    62  	quitC   chan bool
    63  }
    64  
    65  // for testing locally
    66  func NewLocalDPA(datadir string) (*DPA, error) {
    67  
    68  	hash := MakeHashFunc("SHA256")
    69  
    70  	dbStore, err := NewDbStore(datadir, hash, singletonSwarmDbCapacity, 0)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  
    75  	return NewDPA(&LocalStore{
    76  		NewMemStore(dbStore, singletonSwarmCacheCapacity),
    77  		dbStore,
    78  	}, NewChunkerParams()), nil
    79  }
    80  
    81  func NewDPA(store ChunkStore, params *ChunkerParams) *DPA {
    82  	chunker := NewTreeChunker(params)
    83  	return &DPA{
    84  		Chunker:    chunker,
    85  		ChunkStore: store,
    86  	}
    87  }
    88  
    89  // Public API. Main entry point for document retrieval directly. Used by the
    90  // FS-aware API and httpaccess
    91  // Chunk retrieval blocks on netStore requests with a timeout so reader will
    92  // report error if retrieval of chunks within requested range time out.
    93  func (self *DPA) Retrieve(key Key) LazySectionReader {
    94  	return self.Chunker.Join(key, self.retrieveC)
    95  }
    96  
    97  // Public API. Main entry point for document storage directly. Used by the
    98  // FS-aware API and httpaccess
    99  func (self *DPA) Store(data io.Reader, size int64, swg *sync.WaitGroup, wwg *sync.WaitGroup) (key Key, err error) {
   100  	return self.Chunker.Split(data, size, self.storeC, swg, wwg)
   101  }
   102  
   103  func (self *DPA) Start() {
   104  	self.lock.Lock()
   105  	defer self.lock.Unlock()
   106  	if self.running {
   107  		return
   108  	}
   109  	self.running = true
   110  	self.retrieveC = make(chan *Chunk, retrieveChanCapacity)
   111  	self.storeC = make(chan *Chunk, storeChanCapacity)
   112  	self.quitC = make(chan bool)
   113  	self.storeLoop()
   114  	self.retrieveLoop()
   115  }
   116  
   117  func (self *DPA) Stop() {
   118  	self.lock.Lock()
   119  	defer self.lock.Unlock()
   120  	if !self.running {
   121  		return
   122  	}
   123  	self.running = false
   124  	close(self.quitC)
   125  }
   126  
   127  // retrieveLoop dispatches the parallel chunk retrieval requests received on the
   128  // retrieve channel to its ChunkStore  (NetStore or LocalStore)
   129  func (self *DPA) retrieveLoop() {
   130  	for i := 0; i < maxRetrieveProcesses; i++ {
   131  		go self.retrieveWorker()
   132  	}
   133  	log.Trace(fmt.Sprintf("dpa: retrieve loop spawning %v workers", maxRetrieveProcesses))
   134  }
   135  
   136  func (self *DPA) retrieveWorker() {
   137  	for chunk := range self.retrieveC {
   138  		log.Trace(fmt.Sprintf("dpa: retrieve loop : chunk %v", chunk.Key.Log()))
   139  		storedChunk, err := self.Get(chunk.Key)
   140  		if err == notFound {
   141  			log.Trace(fmt.Sprintf("chunk %v not found", chunk.Key.Log()))
   142  		} else if err != nil {
   143  			log.Trace(fmt.Sprintf("error retrieving chunk %v: %v", chunk.Key.Log(), err))
   144  		} else {
   145  			chunk.SData = storedChunk.SData
   146  			chunk.Size = storedChunk.Size
   147  		}
   148  		close(chunk.C)
   149  
   150  		select {
   151  		case <-self.quitC:
   152  			return
   153  		default:
   154  		}
   155  	}
   156  }
   157  
   158  // storeLoop dispatches the parallel chunk store request processors
   159  // received on the store channel to its ChunkStore (NetStore or LocalStore)
   160  func (self *DPA) storeLoop() {
   161  	for i := 0; i < maxStoreProcesses; i++ {
   162  		go self.storeWorker()
   163  	}
   164  	log.Trace(fmt.Sprintf("dpa: store spawning %v workers", maxStoreProcesses))
   165  }
   166  
   167  func (self *DPA) storeWorker() {
   168  
   169  	for chunk := range self.storeC {
   170  		self.Put(chunk)
   171  		if chunk.wg != nil {
   172  			log.Trace(fmt.Sprintf("dpa: store processor %v", chunk.Key.Log()))
   173  			chunk.wg.Done()
   174  
   175  		}
   176  		select {
   177  		case <-self.quitC:
   178  			return
   179  		default:
   180  		}
   181  	}
   182  }
   183  
   184  // DpaChunkStore implements the ChunkStore interface,
   185  // this chunk access layer assumed 2 chunk stores
   186  // local storage eg. LocalStore and network storage eg., NetStore
   187  // access by calling network is blocking with a timeout
   188  
   189  type dpaChunkStore struct {
   190  	n          int
   191  	localStore ChunkStore
   192  	netStore   ChunkStore
   193  }
   194  
   195  func NewDpaChunkStore(localStore, netStore ChunkStore) *dpaChunkStore {
   196  	return &dpaChunkStore{0, localStore, netStore}
   197  }
   198  
   199  // Get is the entrypoint for local retrieve requests
   200  // waits for response or times out
   201  func (self *dpaChunkStore) Get(key Key) (chunk *Chunk, err error) {
   202  	chunk, err = self.netStore.Get(key)
   203  	// timeout := time.Now().Add(searchTimeout)
   204  	if chunk.SData != nil {
   205  		log.Trace(fmt.Sprintf("DPA.Get: %v found locally, %d bytes", key.Log(), len(chunk.SData)))
   206  		return
   207  	}
   208  	// TODO: use self.timer time.Timer and reset with defer disableTimer
   209  	timer := time.After(searchTimeout)
   210  	select {
   211  	case <-timer:
   212  		log.Trace(fmt.Sprintf("DPA.Get: %v request time out ", key.Log()))
   213  		err = notFound
   214  	case <-chunk.Req.C:
   215  		log.Trace(fmt.Sprintf("DPA.Get: %v retrieved, %d bytes (%p)", key.Log(), len(chunk.SData), chunk))
   216  	}
   217  	return
   218  }
   219  
   220  // Put is the entrypoint for local store requests coming from storeLoop
   221  func (self *dpaChunkStore) Put(entry *Chunk) {
   222  	chunk, err := self.localStore.Get(entry.Key)
   223  	if err != nil {
   224  		log.Trace(fmt.Sprintf("DPA.Put: %v new chunk. call netStore.Put", entry.Key.Log()))
   225  		chunk = entry
   226  	} else if chunk.SData == nil {
   227  		log.Trace(fmt.Sprintf("DPA.Put: %v request entry found", entry.Key.Log()))
   228  		chunk.SData = entry.SData
   229  		chunk.Size = entry.Size
   230  	} else {
   231  		log.Trace(fmt.Sprintf("DPA.Put: %v chunk already known", entry.Key.Log()))
   232  		return
   233  	}
   234  	// from this point on the storage logic is the same with network storage requests
   235  	log.Trace(fmt.Sprintf("DPA.Put %v: %v", self.n, chunk.Key.Log()))
   236  	self.n++
   237  	self.netStore.Put(chunk)
   238  }
   239  
   240  // Close chunk store
   241  func (self *dpaChunkStore) Close() {}