github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/replica/replica.go (about) 1 /* 2 Copyright 2011 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Package replica registers the "replica" blobserver storage type, 19 providing synchronous replication to one more backends. 20 21 Writes wait for minWritesForSuccess (default: all). Reads are 22 attempted in order and not load-balanced, randomized, or raced by 23 default. 24 25 Example config: 26 27 "/repl/": { 28 "handler": "storage-replica", 29 "handlerArgs": { 30 "backends": ["/b1/", "/b2/", "/b3/"], 31 "minWritesForSuccess": 2 32 } 33 }, 34 */ 35 package replica 36 37 import ( 38 "bytes" 39 "errors" 40 "fmt" 41 "io" 42 "log" 43 "time" 44 45 "camlistore.org/pkg/blob" 46 "camlistore.org/pkg/blobserver" 47 "camlistore.org/pkg/context" 48 "camlistore.org/pkg/jsonconfig" 49 ) 50 51 var _ blobserver.Generationer = (*replicaStorage)(nil) 52 53 const buffered = 8 54 55 type replicaStorage struct { 56 // Replicas for writing: 57 replicaPrefixes []string 58 replicas []blobserver.Storage 59 60 // Replicas for reading: 61 readPrefixes []string 62 readReplicas []blobserver.Storage 63 64 // Minimum number of writes that must succeed before 65 // acknowledging success to the client. 66 minWritesForSuccess int 67 } 68 69 // NewForTest returns a replicated storage that writes, reads, and 70 // deletes from all the provided storages. 71 func NewForTest(sto []blobserver.Storage) blobserver.Storage { 72 sto = append([]blobserver.Storage(nil), sto...) // clone 73 names := make([]string, len(sto)) 74 for i := range names { 75 names[i] = "/unknown-prefix/" 76 } 77 return &replicaStorage{ 78 replicaPrefixes: names, 79 replicas: sto, 80 readPrefixes: names, 81 readReplicas: sto, 82 minWritesForSuccess: len(sto), 83 } 84 } 85 86 func newFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) { 87 sto := &replicaStorage{ 88 replicaPrefixes: config.RequiredList("backends"), 89 readPrefixes: config.OptionalList("readBackends"), 90 } 91 nReplicas := len(sto.replicaPrefixes) 92 sto.minWritesForSuccess = config.OptionalInt("minWritesForSuccess", nReplicas) 93 if err := config.Validate(); err != nil { 94 return nil, err 95 } 96 if nReplicas == 0 { 97 return nil, errors.New("replica: need at least one replica") 98 } 99 if sto.minWritesForSuccess == 0 { 100 sto.minWritesForSuccess = nReplicas 101 } 102 // readPrefixes defaults to the write prefixes. 103 if len(sto.readPrefixes) == 0 { 104 sto.readPrefixes = sto.replicaPrefixes 105 } 106 107 for _, prefix := range sto.replicaPrefixes { 108 s, err := ld.GetStorage(prefix) 109 if err != nil { 110 // If it's not a storage interface, it might be an http Handler 111 // that also supports being a target (e.g. a sync handler). 112 h, _ := ld.GetHandler(prefix) 113 var ok bool 114 if s, ok = h.(blobserver.Storage); !ok { 115 return nil, err 116 } 117 } 118 sto.replicas = append(sto.replicas, s) 119 } 120 for _, prefix := range sto.readPrefixes { 121 s, err := ld.GetStorage(prefix) 122 if err != nil { 123 return nil, err 124 } 125 sto.readReplicas = append(sto.readReplicas, s) 126 } 127 return sto, nil 128 } 129 130 func (sto *replicaStorage) Fetch(b blob.Ref) (file io.ReadCloser, size uint32, err error) { 131 // TODO: race these? first to respond? 132 for _, replica := range sto.readReplicas { 133 file, size, err = replica.Fetch(b) 134 if err == nil { 135 return 136 } 137 } 138 return 139 } 140 141 // StatBlobs stats all read replicas. 142 func (sto *replicaStorage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { 143 need := make(map[blob.Ref]bool) 144 for _, br := range blobs { 145 need[br] = true 146 } 147 148 ch := make(chan blob.SizedRef, buffered) 149 donec := make(chan bool) 150 151 go func() { 152 for sb := range ch { 153 if need[sb.Ref] { 154 dest <- sb 155 delete(need, sb.Ref) 156 } 157 } 158 donec <- true 159 }() 160 161 errc := make(chan error, buffered) 162 statReplica := func(s blobserver.Storage) { 163 errc <- s.StatBlobs(ch, blobs) 164 } 165 166 for _, replica := range sto.readReplicas { 167 go statReplica(replica) 168 } 169 170 var retErr error 171 for _ = range sto.readReplicas { 172 if err := <-errc; err != nil { 173 retErr = err 174 } 175 } 176 close(ch) 177 <-donec 178 179 // Safe to access need map now; as helper goroutine is 180 // done with it. 181 if len(need) == 0 { 182 return nil 183 } 184 return retErr 185 } 186 187 type sizedBlobAndError struct { 188 idx int 189 sb blob.SizedRef 190 err error 191 } 192 193 func (sto *replicaStorage) ReceiveBlob(br blob.Ref, src io.Reader) (_ blob.SizedRef, err error) { 194 // Slurp the whole blob before replicating. Bounded by 16 MB anyway. 195 var buf bytes.Buffer 196 size, err := io.Copy(&buf, src) 197 if err != nil { 198 return 199 } 200 201 nReplicas := len(sto.replicas) 202 resc := make(chan sizedBlobAndError, nReplicas) 203 uploadToReplica := func(idx int, dst blobserver.BlobReceiver) { 204 // Using ReceiveNoHash because it's already been 205 // verified implicitly by the io.Copy above: 206 sb, err := blobserver.ReceiveNoHash(dst, br, bytes.NewReader(buf.Bytes())) 207 resc <- sizedBlobAndError{idx, sb, err} 208 } 209 for idx, replica := range sto.replicas { 210 go uploadToReplica(idx, replica) 211 } 212 213 nSuccess := 0 214 var fails []sizedBlobAndError 215 for _ = range sto.replicas { 216 res := <-resc 217 switch { 218 case res.err == nil && int64(res.sb.Size) == size: 219 nSuccess++ 220 if nSuccess == sto.minWritesForSuccess { 221 return res.sb, nil 222 } 223 case res.err == nil: 224 err = fmt.Errorf("replica: upload shard reported size %d, expected %d", res.sb.Size, size) 225 res.err = err 226 fails = append(fails, res) 227 default: 228 err = res.err 229 fails = append(fails, res) 230 } 231 } 232 for _, res := range fails { 233 log.Printf("replica: receiving blob %v, %d successes, %d failures; backend %s reported: %v", 234 br, 235 nSuccess, len(fails), 236 sto.replicaPrefixes[res.idx], res.err) 237 } 238 return 239 } 240 241 func (sto *replicaStorage) RemoveBlobs(blobs []blob.Ref) error { 242 errch := make(chan error, buffered) 243 removeFrom := func(s blobserver.Storage) { 244 errch <- s.RemoveBlobs(blobs) 245 } 246 for _, replica := range sto.replicas { 247 go removeFrom(replica) 248 } 249 var reterr error 250 nSuccess := 0 251 for _ = range sto.replicas { 252 if err := <-errch; err != nil { 253 reterr = err 254 } else { 255 nSuccess++ 256 } 257 } 258 if nSuccess > 0 { 259 // TODO: decide on the return value. for now this is best 260 // effort and we return nil if any of the blobservers said 261 // success. maybe a bit weird, though. 262 return nil 263 } 264 return reterr 265 } 266 267 func (sto *replicaStorage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { 268 return blobserver.MergedEnumerate(ctx, dest, sto.readReplicas, after, limit) 269 } 270 271 func (sto *replicaStorage) ResetStorageGeneration() error { 272 var ret error 273 n := 0 274 for _, replica := range sto.replicas { 275 if g, ok := replica.(blobserver.Generationer); ok { 276 n++ 277 if err := g.ResetStorageGeneration(); err != nil && ret == nil { 278 ret = err 279 } 280 } 281 } 282 if n == 0 { 283 return errors.New("ResetStorageGeneration not supported") 284 } 285 return ret 286 } 287 288 func (sto *replicaStorage) StorageGeneration() (initTime time.Time, random string, err error) { 289 var buf bytes.Buffer 290 n := 0 291 for _, replica := range sto.replicas { 292 if g, ok := replica.(blobserver.Generationer); ok { 293 n++ 294 rt, rrand, rerr := g.StorageGeneration() 295 if rerr != nil { 296 err = rerr 297 } else { 298 if rt.After(initTime) { 299 // Returning the max of all initialization times. 300 // TODO: not sure whether max or min makes more sense. 301 initTime = rt 302 } 303 if buf.Len() != 0 { 304 buf.WriteByte('/') 305 } 306 buf.WriteString(rrand) 307 } 308 } 309 } 310 if n == 0 { 311 err = errors.New("No replicas support StorageGeneration") 312 } 313 return initTime, buf.String(), err 314 } 315 316 func init() { 317 blobserver.RegisterStorageConstructor("replica", blobserver.StorageConstructor(newFromConfig)) 318 }