github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/replica/replica.go (about)

     1  /*
     2  Copyright 2011 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package replica registers the "replica" blobserver storage type,
    19  providing synchronous replication to one more backends.
    20  
    21  Writes wait for minWritesForSuccess (default: all). Reads are
    22  attempted in order and not load-balanced, randomized, or raced by
    23  default.
    24  
    25  Example config:
    26  
    27        "/repl/": {
    28            "handler": "storage-replica",
    29            "handlerArgs": {
    30                "backends": ["/b1/", "/b2/", "/b3/"],
    31                "minWritesForSuccess": 2
    32            }
    33        },
    34  */
    35  package replica
    36  
    37  import (
    38  	"bytes"
    39  	"errors"
    40  	"fmt"
    41  	"io"
    42  	"log"
    43  	"time"
    44  
    45  	"camlistore.org/pkg/blob"
    46  	"camlistore.org/pkg/blobserver"
    47  	"camlistore.org/pkg/context"
    48  	"camlistore.org/pkg/jsonconfig"
    49  )
    50  
    51  var _ blobserver.Generationer = (*replicaStorage)(nil)
    52  
    53  const buffered = 8
    54  
    55  type replicaStorage struct {
    56  	// Replicas for writing:
    57  	replicaPrefixes []string
    58  	replicas        []blobserver.Storage
    59  
    60  	// Replicas for reading:
    61  	readPrefixes []string
    62  	readReplicas []blobserver.Storage
    63  
    64  	// Minimum number of writes that must succeed before
    65  	// acknowledging success to the client.
    66  	minWritesForSuccess int
    67  }
    68  
    69  // NewForTest returns a replicated storage that writes, reads, and
    70  // deletes from all the provided storages.
    71  func NewForTest(sto []blobserver.Storage) blobserver.Storage {
    72  	sto = append([]blobserver.Storage(nil), sto...) // clone
    73  	names := make([]string, len(sto))
    74  	for i := range names {
    75  		names[i] = "/unknown-prefix/"
    76  	}
    77  	return &replicaStorage{
    78  		replicaPrefixes:     names,
    79  		replicas:            sto,
    80  		readPrefixes:        names,
    81  		readReplicas:        sto,
    82  		minWritesForSuccess: len(sto),
    83  	}
    84  }
    85  
    86  func newFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) {
    87  	sto := &replicaStorage{
    88  		replicaPrefixes: config.RequiredList("backends"),
    89  		readPrefixes:    config.OptionalList("readBackends"),
    90  	}
    91  	nReplicas := len(sto.replicaPrefixes)
    92  	sto.minWritesForSuccess = config.OptionalInt("minWritesForSuccess", nReplicas)
    93  	if err := config.Validate(); err != nil {
    94  		return nil, err
    95  	}
    96  	if nReplicas == 0 {
    97  		return nil, errors.New("replica: need at least one replica")
    98  	}
    99  	if sto.minWritesForSuccess == 0 {
   100  		sto.minWritesForSuccess = nReplicas
   101  	}
   102  	// readPrefixes defaults to the write prefixes.
   103  	if len(sto.readPrefixes) == 0 {
   104  		sto.readPrefixes = sto.replicaPrefixes
   105  	}
   106  
   107  	for _, prefix := range sto.replicaPrefixes {
   108  		s, err := ld.GetStorage(prefix)
   109  		if err != nil {
   110  			// If it's not a storage interface, it might be an http Handler
   111  			// that also supports being a target (e.g. a sync handler).
   112  			h, _ := ld.GetHandler(prefix)
   113  			var ok bool
   114  			if s, ok = h.(blobserver.Storage); !ok {
   115  				return nil, err
   116  			}
   117  		}
   118  		sto.replicas = append(sto.replicas, s)
   119  	}
   120  	for _, prefix := range sto.readPrefixes {
   121  		s, err := ld.GetStorage(prefix)
   122  		if err != nil {
   123  			return nil, err
   124  		}
   125  		sto.readReplicas = append(sto.readReplicas, s)
   126  	}
   127  	return sto, nil
   128  }
   129  
   130  func (sto *replicaStorage) Fetch(b blob.Ref) (file io.ReadCloser, size uint32, err error) {
   131  	// TODO: race these? first to respond?
   132  	for _, replica := range sto.readReplicas {
   133  		file, size, err = replica.Fetch(b)
   134  		if err == nil {
   135  			return
   136  		}
   137  	}
   138  	return
   139  }
   140  
   141  // StatBlobs stats all read replicas.
   142  func (sto *replicaStorage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
   143  	need := make(map[blob.Ref]bool)
   144  	for _, br := range blobs {
   145  		need[br] = true
   146  	}
   147  
   148  	ch := make(chan blob.SizedRef, buffered)
   149  	donec := make(chan bool)
   150  
   151  	go func() {
   152  		for sb := range ch {
   153  			if need[sb.Ref] {
   154  				dest <- sb
   155  				delete(need, sb.Ref)
   156  			}
   157  		}
   158  		donec <- true
   159  	}()
   160  
   161  	errc := make(chan error, buffered)
   162  	statReplica := func(s blobserver.Storage) {
   163  		errc <- s.StatBlobs(ch, blobs)
   164  	}
   165  
   166  	for _, replica := range sto.readReplicas {
   167  		go statReplica(replica)
   168  	}
   169  
   170  	var retErr error
   171  	for _ = range sto.readReplicas {
   172  		if err := <-errc; err != nil {
   173  			retErr = err
   174  		}
   175  	}
   176  	close(ch)
   177  	<-donec
   178  
   179  	// Safe to access need map now; as helper goroutine is
   180  	// done with it.
   181  	if len(need) == 0 {
   182  		return nil
   183  	}
   184  	return retErr
   185  }
   186  
   187  type sizedBlobAndError struct {
   188  	idx int
   189  	sb  blob.SizedRef
   190  	err error
   191  }
   192  
   193  func (sto *replicaStorage) ReceiveBlob(br blob.Ref, src io.Reader) (_ blob.SizedRef, err error) {
   194  	// Slurp the whole blob before replicating. Bounded by 16 MB anyway.
   195  	var buf bytes.Buffer
   196  	size, err := io.Copy(&buf, src)
   197  	if err != nil {
   198  		return
   199  	}
   200  
   201  	nReplicas := len(sto.replicas)
   202  	resc := make(chan sizedBlobAndError, nReplicas)
   203  	uploadToReplica := func(idx int, dst blobserver.BlobReceiver) {
   204  		// Using ReceiveNoHash because it's already been
   205  		// verified implicitly by the io.Copy above:
   206  		sb, err := blobserver.ReceiveNoHash(dst, br, bytes.NewReader(buf.Bytes()))
   207  		resc <- sizedBlobAndError{idx, sb, err}
   208  	}
   209  	for idx, replica := range sto.replicas {
   210  		go uploadToReplica(idx, replica)
   211  	}
   212  
   213  	nSuccess := 0
   214  	var fails []sizedBlobAndError
   215  	for _ = range sto.replicas {
   216  		res := <-resc
   217  		switch {
   218  		case res.err == nil && int64(res.sb.Size) == size:
   219  			nSuccess++
   220  			if nSuccess == sto.minWritesForSuccess {
   221  				return res.sb, nil
   222  			}
   223  		case res.err == nil:
   224  			err = fmt.Errorf("replica: upload shard reported size %d, expected %d", res.sb.Size, size)
   225  			res.err = err
   226  			fails = append(fails, res)
   227  		default:
   228  			err = res.err
   229  			fails = append(fails, res)
   230  		}
   231  	}
   232  	for _, res := range fails {
   233  		log.Printf("replica: receiving blob %v, %d successes, %d failures; backend %s reported: %v",
   234  			br,
   235  			nSuccess, len(fails),
   236  			sto.replicaPrefixes[res.idx], res.err)
   237  	}
   238  	return
   239  }
   240  
   241  func (sto *replicaStorage) RemoveBlobs(blobs []blob.Ref) error {
   242  	errch := make(chan error, buffered)
   243  	removeFrom := func(s blobserver.Storage) {
   244  		errch <- s.RemoveBlobs(blobs)
   245  	}
   246  	for _, replica := range sto.replicas {
   247  		go removeFrom(replica)
   248  	}
   249  	var reterr error
   250  	nSuccess := 0
   251  	for _ = range sto.replicas {
   252  		if err := <-errch; err != nil {
   253  			reterr = err
   254  		} else {
   255  			nSuccess++
   256  		}
   257  	}
   258  	if nSuccess > 0 {
   259  		// TODO: decide on the return value. for now this is best
   260  		// effort and we return nil if any of the blobservers said
   261  		// success.  maybe a bit weird, though.
   262  		return nil
   263  	}
   264  	return reterr
   265  }
   266  
   267  func (sto *replicaStorage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error {
   268  	return blobserver.MergedEnumerate(ctx, dest, sto.readReplicas, after, limit)
   269  }
   270  
   271  func (sto *replicaStorage) ResetStorageGeneration() error {
   272  	var ret error
   273  	n := 0
   274  	for _, replica := range sto.replicas {
   275  		if g, ok := replica.(blobserver.Generationer); ok {
   276  			n++
   277  			if err := g.ResetStorageGeneration(); err != nil && ret == nil {
   278  				ret = err
   279  			}
   280  		}
   281  	}
   282  	if n == 0 {
   283  		return errors.New("ResetStorageGeneration not supported")
   284  	}
   285  	return ret
   286  }
   287  
   288  func (sto *replicaStorage) StorageGeneration() (initTime time.Time, random string, err error) {
   289  	var buf bytes.Buffer
   290  	n := 0
   291  	for _, replica := range sto.replicas {
   292  		if g, ok := replica.(blobserver.Generationer); ok {
   293  			n++
   294  			rt, rrand, rerr := g.StorageGeneration()
   295  			if rerr != nil {
   296  				err = rerr
   297  			} else {
   298  				if rt.After(initTime) {
   299  					// Returning the max of all initialization times.
   300  					// TODO: not sure whether max or min makes more sense.
   301  					initTime = rt
   302  				}
   303  				if buf.Len() != 0 {
   304  					buf.WriteByte('/')
   305  				}
   306  				buf.WriteString(rrand)
   307  			}
   308  		}
   309  	}
   310  	if n == 0 {
   311  		err = errors.New("No replicas support StorageGeneration")
   312  	}
   313  	return initTime, buf.String(), err
   314  }
   315  
   316  func init() {
   317  	blobserver.RegisterStorageConstructor("replica", blobserver.StorageConstructor(newFromConfig))
   318  }