github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blob/fetcher.go (about)

     1  /*
     2  Copyright 2011 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package blob
    18  
    19  import (
    20  	"bytes"
    21  	"crypto"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"io/ioutil"
    26  	"os"
    27  	"strings"
    28  	"sync"
    29  
    30  	"camlistore.org/pkg/constants"
    31  	"camlistore.org/pkg/types"
    32  )
    33  
    34  // TODO: rename StreamingFetcher to be Fetcher (the common case)
    35  
    36  // TODO: add FetcherAt / FetchAt (for HTTP range requests).  But then how
    37  // to make all SeekFetcer also be a FetchAt? By hand?
    38  
    39  type SeekFetcher interface {
    40  	// Fetch returns a blob.  If the blob is not found then
    41  	// os.ErrNotExist should be returned for the error (not a wrapped
    42  	// error with a ErrNotExist inside)
    43  	//
    44  	// The caller should close blob.
    45  	Fetch(Ref) (blob types.ReadSeekCloser, size int64, err error)
    46  }
    47  
    48  // SeekTester is the interface implemented by storage implementations that don't
    49  // know until runtime whether or not their StreamingFetcher happens to also
    50  // return a ReadCloser that's also a ReadSeekCloser.
    51  type SeekTester interface {
    52  	IsFetcherASeeker() bool
    53  }
    54  
    55  // fetcherToSeekerWrapper wraps a StreamingFetcher and converts it into
    56  // a SeekFetcher if SeekTester has confirmed the interface conversion
    57  // is safe.
    58  type fetcherToSeekerWrapper struct {
    59  	StreamingFetcher
    60  }
    61  
    62  func (w *fetcherToSeekerWrapper) Fetch(r Ref) (file types.ReadSeekCloser, size int64, err error) {
    63  	rc, size, err := w.StreamingFetcher.FetchStreaming(r)
    64  	if err != nil {
    65  		return
    66  	}
    67  	file, ok := rc.(types.ReadSeekCloser)
    68  	if ok {
    69  		return
    70  	}
    71  	// we must make it seekable
    72  	var slurp bytes.Buffer
    73  	n, err := io.CopyN(&slurp, rc, constants.MaxBlobSize+1)
    74  	if err != nil && err != io.EOF {
    75  		return nil, 0, err
    76  	}
    77  	if n > constants.MaxBlobSize {
    78  		return nil, 0, fmt.Errorf("blob %v too big", r)
    79  	}
    80  	return struct {
    81  		io.ReadSeeker
    82  		io.Closer
    83  	}{
    84  		bytes.NewReader(slurp.Bytes()),
    85  		ioutil.NopCloser(nil),
    86  	}, n, err
    87  }
    88  
    89  // StreamingFetcher is the minimal interface for retrieving a blob from storage.
    90  // The full storage interface is blobserver.Stoage.
    91  type StreamingFetcher interface {
    92  	// FetchStreaming returns a blob.  If the blob is not found then
    93  	// os.ErrNotExist should be returned for the error (not a wrapped
    94  	// error with a ErrNotExist inside)
    95  	//
    96  	// The caller should close blob.
    97  	FetchStreaming(Ref) (blob io.ReadCloser, size int64, err error)
    98  }
    99  
   100  func NewSerialFetcher(fetchers ...SeekFetcher) SeekFetcher {
   101  	return &serialFetcher{fetchers}
   102  }
   103  
   104  func NewSerialStreamingFetcher(fetchers ...StreamingFetcher) StreamingFetcher {
   105  	return &serialStreamingFetcher{fetchers}
   106  }
   107  
   108  func NewSimpleDirectoryFetcher(dir string) *DirFetcher {
   109  	return &DirFetcher{dir, "camli"}
   110  }
   111  
   112  type serialFetcher struct {
   113  	fetchers []SeekFetcher
   114  }
   115  
   116  func (sf *serialFetcher) Fetch(r Ref) (file types.ReadSeekCloser, size int64, err error) {
   117  	for _, fetcher := range sf.fetchers {
   118  		file, size, err = fetcher.Fetch(r)
   119  		if err == nil {
   120  			return
   121  		}
   122  	}
   123  	return
   124  
   125  }
   126  
   127  type serialStreamingFetcher struct {
   128  	fetchers []StreamingFetcher
   129  }
   130  
   131  func (sf *serialStreamingFetcher) FetchStreaming(r Ref) (file io.ReadCloser, size int64, err error) {
   132  	for _, fetcher := range sf.fetchers {
   133  		file, size, err = fetcher.FetchStreaming(r)
   134  		if err == nil {
   135  			return
   136  		}
   137  	}
   138  	return
   139  }
   140  
   141  type DirFetcher struct {
   142  	directory, extension string
   143  }
   144  
   145  func (df *DirFetcher) FetchStreaming(r Ref) (file io.ReadCloser, size int64, err error) {
   146  	return df.Fetch(r)
   147  }
   148  
   149  func (df *DirFetcher) Fetch(r Ref) (file types.ReadSeekCloser, size int64, err error) {
   150  	fileName := fmt.Sprintf("%s/%s.%s", df.directory, r.String(), df.extension)
   151  	var stat os.FileInfo
   152  	stat, err = os.Stat(fileName)
   153  	if err != nil {
   154  		return
   155  	}
   156  	file, err = os.Open(fileName)
   157  	if err != nil {
   158  		return
   159  	}
   160  	size = stat.Size()
   161  	return
   162  }
   163  
   164  // MemoryStore stores blobs in memory and is a Fetcher and
   165  // StreamingFetcher. Its zero value is usable.
   166  type MemoryStore struct {
   167  	lk sync.Mutex
   168  	m  map[string]string
   169  }
   170  
   171  func (s *MemoryStore) AddBlob(hashtype crypto.Hash, data string) (Ref, error) {
   172  	if hashtype != crypto.SHA1 {
   173  		return Ref{}, errors.New("blobref: unsupported hash type")
   174  	}
   175  	hash := hashtype.New()
   176  	hash.Write([]byte(data))
   177  	bstr := fmt.Sprintf("sha1-%x", hash.Sum(nil))
   178  	s.lk.Lock()
   179  	defer s.lk.Unlock()
   180  	if s.m == nil {
   181  		s.m = make(map[string]string)
   182  	}
   183  	s.m[bstr] = data
   184  	return MustParse(bstr), nil
   185  }
   186  
   187  func (s *MemoryStore) FetchStreaming(b Ref) (file io.ReadCloser, size int64, err error) {
   188  	s.lk.Lock()
   189  	defer s.lk.Unlock()
   190  	if s.m == nil {
   191  		return nil, 0, os.ErrNotExist
   192  	}
   193  	str, ok := s.m[b.String()]
   194  	if !ok {
   195  		return nil, 0, os.ErrNotExist
   196  	}
   197  	return ioutil.NopCloser(strings.NewReader(str)), int64(len(str)), nil
   198  }
   199  
   200  // SeekerFromStreamingFetcher returns the most efficient implementation of a seeking fetcher
   201  // from a provided streaming fetcher.
   202  func SeekerFromStreamingFetcher(f StreamingFetcher) SeekFetcher {
   203  	if sk, ok := f.(SeekFetcher); ok {
   204  		return sk
   205  	}
   206  	if tester, ok := f.(SeekTester); ok && tester.IsFetcherASeeker() {
   207  		return &fetcherToSeekerWrapper{f}
   208  	}
   209  	return bufferingSeekFetcherWrapper{f}
   210  }
   211  
   212  // bufferingSeekFetcherWrapper is a SeekFetcher that implements
   213  // seeking on a wrapped streaming-only fetcher by buffering the
   214  // content into memory, optionally spilling to disk if local disk is
   215  // available.  In practice, most blobs will be "small" (able to fit in
   216  // memory).
   217  type bufferingSeekFetcherWrapper struct {
   218  	sf StreamingFetcher
   219  }
   220  
   221  func (b bufferingSeekFetcherWrapper) Fetch(br Ref) (rsc types.ReadSeekCloser, size int64, err error) {
   222  	rc, size, err := b.sf.FetchStreaming(br)
   223  	if err != nil {
   224  		return nil, 0, err
   225  	}
   226  	defer rc.Close()
   227  
   228  	const tryDiskThreshold = 32 << 20
   229  	if size > tryDiskThreshold {
   230  		// TODO(bradfitz): disk spilling, if a temp file can be made
   231  	}
   232  
   233  	// Buffer all to memory
   234  	var buf bytes.Buffer
   235  	n, err := io.Copy(&buf, rc)
   236  	if err != nil {
   237  		return nil, 0, fmt.Errorf("Error reading blob %s: %v", br, err)
   238  	}
   239  	if n != size {
   240  		return nil, 0, fmt.Errorf("Read %d bytes of %s; expected %s", n, br, size)
   241  	}
   242  	return struct {
   243  		io.ReadSeeker
   244  		io.Closer
   245  	}{
   246  		ReadSeeker: io.NewSectionReader(bytes.NewReader(buf.Bytes()), 0, size),
   247  		Closer:     ioutil.NopCloser(nil),
   248  	}, size, nil
   249  }