github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/swarm/storage/dpa.go (about)

     1  // This file is part of the go-sberex library. The go-sberex library is 
     2  // free software: you can redistribute it and/or modify it under the terms 
     3  // of the GNU Lesser General Public License as published by the Free 
     4  // Software Foundation, either version 3 of the License, or (at your option)
     5  // any later version.
     6  //
     7  // The go-sberex library is distributed in the hope that it will be useful, 
     8  // but WITHOUT ANY WARRANTY; without even the implied warranty of
     9  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 
    10  // General Public License <http://www.gnu.org/licenses/> for more details.
    11  
    12  package storage
    13  
    14  import (
    15  	"errors"
    16  	"fmt"
    17  	"io"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/Sberex/go-sberex/log"
    22  )
    23  
    24  /*
    25  DPA provides the client API entrypoints Store and Retrieve to store and retrieve
    26  It can store anything that has a byte slice representation, so files or serialised objects etc.
    27  
    28  Storage: DPA calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client.
    29  
    30  Retrieval: given the key of the root block, the DPA retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read.
    31  
    32  As the chunker produces chunks, DPA dispatches them to its own chunk store
    33  implementation for storage or retrieval.
    34  */
    35  
    36  const (
    37  	storeChanCapacity           = 100
    38  	retrieveChanCapacity        = 100
    39  	singletonSwarmDbCapacity    = 50000
    40  	singletonSwarmCacheCapacity = 500
    41  	maxStoreProcesses           = 8
    42  	maxRetrieveProcesses        = 8
    43  )
    44  
    45  var (
    46  	notFound = errors.New("not found")
    47  )
    48  
    49  type DPA struct {
    50  	ChunkStore
    51  	storeC    chan *Chunk
    52  	retrieveC chan *Chunk
    53  	Chunker   Chunker
    54  
    55  	lock    sync.Mutex
    56  	running bool
    57  	quitC   chan bool
    58  }
    59  
    60  // for testing locally
    61  func NewLocalDPA(datadir string) (*DPA, error) {
    62  
    63  	hash := MakeHashFunc("SHA256")
    64  
    65  	dbStore, err := NewDbStore(datadir, hash, singletonSwarmDbCapacity, 0)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	return NewDPA(&LocalStore{
    71  		NewMemStore(dbStore, singletonSwarmCacheCapacity),
    72  		dbStore,
    73  	}, NewChunkerParams()), nil
    74  }
    75  
    76  func NewDPA(store ChunkStore, params *ChunkerParams) *DPA {
    77  	chunker := NewTreeChunker(params)
    78  	return &DPA{
    79  		Chunker:    chunker,
    80  		ChunkStore: store,
    81  	}
    82  }
    83  
    84  // Public API. Main entry point for document retrieval directly. Used by the
    85  // FS-aware API and httpaccess
    86  // Chunk retrieval blocks on netStore requests with a timeout so reader will
    87  // report error if retrieval of chunks within requested range time out.
    88  func (self *DPA) Retrieve(key Key) LazySectionReader {
    89  	return self.Chunker.Join(key, self.retrieveC)
    90  }
    91  
    92  // Public API. Main entry point for document storage directly. Used by the
    93  // FS-aware API and httpaccess
    94  func (self *DPA) Store(data io.Reader, size int64, swg *sync.WaitGroup, wwg *sync.WaitGroup) (key Key, err error) {
    95  	return self.Chunker.Split(data, size, self.storeC, swg, wwg)
    96  }
    97  
    98  func (self *DPA) Start() {
    99  	self.lock.Lock()
   100  	defer self.lock.Unlock()
   101  	if self.running {
   102  		return
   103  	}
   104  	self.running = true
   105  	self.retrieveC = make(chan *Chunk, retrieveChanCapacity)
   106  	self.storeC = make(chan *Chunk, storeChanCapacity)
   107  	self.quitC = make(chan bool)
   108  	self.storeLoop()
   109  	self.retrieveLoop()
   110  }
   111  
   112  func (self *DPA) Stop() {
   113  	self.lock.Lock()
   114  	defer self.lock.Unlock()
   115  	if !self.running {
   116  		return
   117  	}
   118  	self.running = false
   119  	close(self.quitC)
   120  }
   121  
   122  // retrieveLoop dispatches the parallel chunk retrieval requests received on the
   123  // retrieve channel to its ChunkStore  (NetStore or LocalStore)
   124  func (self *DPA) retrieveLoop() {
   125  	for i := 0; i < maxRetrieveProcesses; i++ {
   126  		go self.retrieveWorker()
   127  	}
   128  	log.Trace(fmt.Sprintf("dpa: retrieve loop spawning %v workers", maxRetrieveProcesses))
   129  }
   130  
   131  func (self *DPA) retrieveWorker() {
   132  	for chunk := range self.retrieveC {
   133  		log.Trace(fmt.Sprintf("dpa: retrieve loop : chunk %v", chunk.Key.Log()))
   134  		storedChunk, err := self.Get(chunk.Key)
   135  		if err == notFound {
   136  			log.Trace(fmt.Sprintf("chunk %v not found", chunk.Key.Log()))
   137  		} else if err != nil {
   138  			log.Trace(fmt.Sprintf("error retrieving chunk %v: %v", chunk.Key.Log(), err))
   139  		} else {
   140  			chunk.SData = storedChunk.SData
   141  			chunk.Size = storedChunk.Size
   142  		}
   143  		close(chunk.C)
   144  
   145  		select {
   146  		case <-self.quitC:
   147  			return
   148  		default:
   149  		}
   150  	}
   151  }
   152  
   153  // storeLoop dispatches the parallel chunk store request processors
   154  // received on the store channel to its ChunkStore (NetStore or LocalStore)
   155  func (self *DPA) storeLoop() {
   156  	for i := 0; i < maxStoreProcesses; i++ {
   157  		go self.storeWorker()
   158  	}
   159  	log.Trace(fmt.Sprintf("dpa: store spawning %v workers", maxStoreProcesses))
   160  }
   161  
   162  func (self *DPA) storeWorker() {
   163  
   164  	for chunk := range self.storeC {
   165  		self.Put(chunk)
   166  		if chunk.wg != nil {
   167  			log.Trace(fmt.Sprintf("dpa: store processor %v", chunk.Key.Log()))
   168  			chunk.wg.Done()
   169  
   170  		}
   171  		select {
   172  		case <-self.quitC:
   173  			return
   174  		default:
   175  		}
   176  	}
   177  }
   178  
   179  // DpaChunkStore implements the ChunkStore interface,
   180  // this chunk access layer assumed 2 chunk stores
   181  // local storage eg. LocalStore and network storage eg., NetStore
   182  // access by calling network is blocking with a timeout
   183  
   184  type dpaChunkStore struct {
   185  	n          int
   186  	localStore ChunkStore
   187  	netStore   ChunkStore
   188  }
   189  
   190  func NewDpaChunkStore(localStore, netStore ChunkStore) *dpaChunkStore {
   191  	return &dpaChunkStore{0, localStore, netStore}
   192  }
   193  
   194  // Get is the entrypoint for local retrieve requests
   195  // waits for response or times out
   196  func (self *dpaChunkStore) Get(key Key) (chunk *Chunk, err error) {
   197  	chunk, err = self.netStore.Get(key)
   198  	// timeout := time.Now().Add(searchTimeout)
   199  	if chunk.SData != nil {
   200  		log.Trace(fmt.Sprintf("DPA.Get: %v found locally, %d bytes", key.Log(), len(chunk.SData)))
   201  		return
   202  	}
   203  	// TODO: use self.timer time.Timer and reset with defer disableTimer
   204  	timer := time.After(searchTimeout)
   205  	select {
   206  	case <-timer:
   207  		log.Trace(fmt.Sprintf("DPA.Get: %v request time out ", key.Log()))
   208  		err = notFound
   209  	case <-chunk.Req.C:
   210  		log.Trace(fmt.Sprintf("DPA.Get: %v retrieved, %d bytes (%p)", key.Log(), len(chunk.SData), chunk))
   211  	}
   212  	return
   213  }
   214  
   215  // Put is the entrypoint for local store requests coming from storeLoop
   216  func (self *dpaChunkStore) Put(entry *Chunk) {
   217  	chunk, err := self.localStore.Get(entry.Key)
   218  	if err != nil {
   219  		log.Trace(fmt.Sprintf("DPA.Put: %v new chunk. call netStore.Put", entry.Key.Log()))
   220  		chunk = entry
   221  	} else if chunk.SData == nil {
   222  		log.Trace(fmt.Sprintf("DPA.Put: %v request entry found", entry.Key.Log()))
   223  		chunk.SData = entry.SData
   224  		chunk.Size = entry.Size
   225  	} else {
   226  		log.Trace(fmt.Sprintf("DPA.Put: %v chunk already known", entry.Key.Log()))
   227  		return
   228  	}
   229  	// from this point on the storage logic is the same with network storage requests
   230  	log.Trace(fmt.Sprintf("DPA.Put %v: %v", self.n, chunk.Key.Log()))
   231  	self.n++
   232  	self.netStore.Put(chunk)
   233  }
   234  
   235  // Close chunk store
   236  func (self *dpaChunkStore) Close() {}