github.com/alanchchen/go-ethereum@v1.6.6-0.20170601190819-6171d01b1195/swarm/storage/dpa.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/ethereum/go-ethereum/log"
    27  )
    28  
    29  /*
    30  DPA provides the client API entrypoints Store and Retrieve to store and retrieve
    31  It can store anything that has a byte slice representation, so files or serialised objects etc.
    32  
    33  Storage: DPA calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client.
    34  
    35  Retrieval: given the key of the root block, the DPA retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read.
    36  
    37  As the chunker produces chunks, DPA dispatches them to its own chunk store
    38  implementation for storage or retrieval.
    39  */
    40  
    41  const (
    42  	storeChanCapacity           = 100
    43  	retrieveChanCapacity        = 100
    44  	singletonSwarmDbCapacity    = 50000
    45  	singletonSwarmCacheCapacity = 500
    46  	maxStoreProcesses           = 8
    47  	maxRetrieveProcesses        = 8
    48  )
    49  
    50  var (
    51  	notFound = errors.New("not found")
    52  )
    53  
    54  type DPA struct {
    55  	ChunkStore
    56  	storeC    chan *Chunk
    57  	retrieveC chan *Chunk
    58  	Chunker   Chunker
    59  
    60  	lock    sync.Mutex
    61  	running bool
    62  	wg      *sync.WaitGroup
    63  	quitC   chan bool
    64  }
    65  
    66  // for testing locally
    67  func NewLocalDPA(datadir string) (*DPA, error) {
    68  
    69  	hash := MakeHashFunc("SHA256")
    70  
    71  	dbStore, err := NewDbStore(datadir, hash, singletonSwarmDbCapacity, 0)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  
    76  	return NewDPA(&LocalStore{
    77  		NewMemStore(dbStore, singletonSwarmCacheCapacity),
    78  		dbStore,
    79  	}, NewChunkerParams()), nil
    80  }
    81  
    82  func NewDPA(store ChunkStore, params *ChunkerParams) *DPA {
    83  	chunker := NewTreeChunker(params)
    84  	return &DPA{
    85  		Chunker:    chunker,
    86  		ChunkStore: store,
    87  	}
    88  }
    89  
    90  // Public API. Main entry point for document retrieval directly. Used by the
    91  // FS-aware API and httpaccess
    92  // Chunk retrieval blocks on netStore requests with a timeout so reader will
    93  // report error if retrieval of chunks within requested range time out.
    94  func (self *DPA) Retrieve(key Key) LazySectionReader {
    95  	return self.Chunker.Join(key, self.retrieveC)
    96  }
    97  
    98  // Public API. Main entry point for document storage directly. Used by the
    99  // FS-aware API and httpaccess
   100  func (self *DPA) Store(data io.Reader, size int64, swg *sync.WaitGroup, wwg *sync.WaitGroup) (key Key, err error) {
   101  	return self.Chunker.Split(data, size, self.storeC, swg, wwg)
   102  }
   103  
   104  func (self *DPA) Start() {
   105  	self.lock.Lock()
   106  	defer self.lock.Unlock()
   107  	if self.running {
   108  		return
   109  	}
   110  	self.running = true
   111  	self.retrieveC = make(chan *Chunk, retrieveChanCapacity)
   112  	self.storeC = make(chan *Chunk, storeChanCapacity)
   113  	self.quitC = make(chan bool)
   114  	self.storeLoop()
   115  	self.retrieveLoop()
   116  }
   117  
   118  func (self *DPA) Stop() {
   119  	self.lock.Lock()
   120  	defer self.lock.Unlock()
   121  	if !self.running {
   122  		return
   123  	}
   124  	self.running = false
   125  	close(self.quitC)
   126  }
   127  
   128  // retrieveLoop dispatches the parallel chunk retrieval requests received on the
   129  // retrieve channel to its ChunkStore  (NetStore or LocalStore)
   130  func (self *DPA) retrieveLoop() {
   131  	for i := 0; i < maxRetrieveProcesses; i++ {
   132  		go self.retrieveWorker()
   133  	}
   134  	log.Trace(fmt.Sprintf("dpa: retrieve loop spawning %v workers", maxRetrieveProcesses))
   135  }
   136  
   137  func (self *DPA) retrieveWorker() {
   138  	for chunk := range self.retrieveC {
   139  		log.Trace(fmt.Sprintf("dpa: retrieve loop : chunk %v", chunk.Key.Log()))
   140  		storedChunk, err := self.Get(chunk.Key)
   141  		if err == notFound {
   142  			log.Trace(fmt.Sprintf("chunk %v not found", chunk.Key.Log()))
   143  		} else if err != nil {
   144  			log.Trace(fmt.Sprintf("error retrieving chunk %v: %v", chunk.Key.Log(), err))
   145  		} else {
   146  			chunk.SData = storedChunk.SData
   147  			chunk.Size = storedChunk.Size
   148  		}
   149  		close(chunk.C)
   150  
   151  		select {
   152  		case <-self.quitC:
   153  			return
   154  		default:
   155  		}
   156  	}
   157  }
   158  
   159  // storeLoop dispatches the parallel chunk store request processors
   160  // received on the store channel to its ChunkStore (NetStore or LocalStore)
   161  func (self *DPA) storeLoop() {
   162  	for i := 0; i < maxStoreProcesses; i++ {
   163  		go self.storeWorker()
   164  	}
   165  	log.Trace(fmt.Sprintf("dpa: store spawning %v workers", maxStoreProcesses))
   166  }
   167  
   168  func (self *DPA) storeWorker() {
   169  
   170  	for chunk := range self.storeC {
   171  		self.Put(chunk)
   172  		if chunk.wg != nil {
   173  			log.Trace(fmt.Sprintf("dpa: store processor %v", chunk.Key.Log()))
   174  			chunk.wg.Done()
   175  
   176  		}
   177  		select {
   178  		case <-self.quitC:
   179  			return
   180  		default:
   181  		}
   182  	}
   183  }
   184  
   185  // DpaChunkStore implements the ChunkStore interface,
   186  // this chunk access layer assumed 2 chunk stores
   187  // local storage eg. LocalStore and network storage eg., NetStore
   188  // access by calling network is blocking with a timeout
   189  
   190  type dpaChunkStore struct {
   191  	n          int
   192  	localStore ChunkStore
   193  	netStore   ChunkStore
   194  }
   195  
   196  func NewDpaChunkStore(localStore, netStore ChunkStore) *dpaChunkStore {
   197  	return &dpaChunkStore{0, localStore, netStore}
   198  }
   199  
   200  // Get is the entrypoint for local retrieve requests
   201  // waits for response or times out
   202  func (self *dpaChunkStore) Get(key Key) (chunk *Chunk, err error) {
   203  	chunk, err = self.netStore.Get(key)
   204  	// timeout := time.Now().Add(searchTimeout)
   205  	if chunk.SData != nil {
   206  		log.Trace(fmt.Sprintf("DPA.Get: %v found locally, %d bytes", key.Log(), len(chunk.SData)))
   207  		return
   208  	}
   209  	// TODO: use self.timer time.Timer and reset with defer disableTimer
   210  	timer := time.After(searchTimeout)
   211  	select {
   212  	case <-timer:
   213  		log.Trace(fmt.Sprintf("DPA.Get: %v request time out ", key.Log()))
   214  		err = notFound
   215  	case <-chunk.Req.C:
   216  		log.Trace(fmt.Sprintf("DPA.Get: %v retrieved, %d bytes (%p)", key.Log(), len(chunk.SData), chunk))
   217  	}
   218  	return
   219  }
   220  
   221  // Put is the entrypoint for local store requests coming from storeLoop
   222  func (self *dpaChunkStore) Put(entry *Chunk) {
   223  	chunk, err := self.localStore.Get(entry.Key)
   224  	if err != nil {
   225  		log.Trace(fmt.Sprintf("DPA.Put: %v new chunk. call netStore.Put", entry.Key.Log()))
   226  		chunk = entry
   227  	} else if chunk.SData == nil {
   228  		log.Trace(fmt.Sprintf("DPA.Put: %v request entry found", entry.Key.Log()))
   229  		chunk.SData = entry.SData
   230  		chunk.Size = entry.Size
   231  	} else {
   232  		log.Trace(fmt.Sprintf("DPA.Put: %v chunk already known", entry.Key.Log()))
   233  		return
   234  	}
   235  	// from this point on the storage logic is the same with network storage requests
   236  	log.Trace(fmt.Sprintf("DPA.Put %v: %v", self.n, chunk.Key.Log()))
   237  	self.n++
   238  	self.netStore.Put(chunk)
   239  }
   240  
   241  // Close chunk store
   242  func (self *dpaChunkStore) Close() {
   243  	return
   244  }