github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/cmd/camtool/sync.go (about) 1 /* 2 Copyright 2013 The Camlistore Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "errors" 21 "flag" 22 "fmt" 23 "log" 24 "net/http" 25 "os" 26 "strconv" 27 "strings" 28 "time" 29 30 "camlistore.org/pkg/blob" 31 "camlistore.org/pkg/blobserver" 32 "camlistore.org/pkg/blobserver/localdisk" 33 "camlistore.org/pkg/client" 34 "camlistore.org/pkg/cmdmain" 35 "camlistore.org/pkg/context" 36 ) 37 38 type syncCmd struct { 39 src string 40 dest string 41 third string 42 43 loop bool 44 verbose bool 45 all bool 46 removeSrc bool 47 wipe bool 48 insecureTLS bool 49 50 logger *log.Logger 51 } 52 53 func init() { 54 cmdmain.RegisterCommand("sync", func(flags *flag.FlagSet) cmdmain.CommandRunner { 55 cmd := new(syncCmd) 56 flags.StringVar(&cmd.src, "src", "", "Source blobserver is either a URL prefix (with optional path), a host[:port], a path (starting with /, ./, or ../), or blank to use the Camlistore client config's default host.") 57 flags.StringVar(&cmd.dest, "dest", "", "Destination blobserver (same format as src), or 'stdout' to just enumerate the --src blobs to stdout.") 58 flags.StringVar(&cmd.third, "thirdleg", "", "Copy blobs present in source but missing from destination to this 'third leg' blob store, instead of the destination. (same format as src)") 59 60 flags.BoolVar(&cmd.loop, "loop", false, "Create an associate a new permanode for the uploaded file or directory.") 61 flags.BoolVar(&cmd.verbose, "verbose", false, "Be verbose.") 62 flags.BoolVar(&cmd.wipe, "wipe", false, "If dest is an index, drop it and repopulate it from scratch. NOOP for now.") 63 flags.BoolVar(&cmd.all, "all", false, "Discover all sync destinations configured on the source server and run them.") 64 flags.BoolVar(&cmd.removeSrc, "removesrc", false, "Remove each blob from the source after syncing to the destination; for queue processing.") 65 // TODO(mpl): maybe move this flag up to the client pkg as an AddFlag, as it can be used by all commands. 66 if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug { 67 flags.BoolVar(&cmd.insecureTLS, "insecure", false, "If set, when using TLS, the server's certificates verification is disabled, and they are not checked against the trustedCerts in the client configuration either.") 68 } 69 70 return cmd 71 }) 72 } 73 74 func (c *syncCmd) Describe() string { 75 return "Synchronize blobs from a source to a destination." 76 } 77 78 func (c *syncCmd) Usage() { 79 fmt.Fprintf(os.Stderr, "Usage: camtool [globalopts] sync [syncopts] \n") 80 } 81 82 func (c *syncCmd) Examples() []string { 83 return []string{ 84 "--all", 85 "--src http://localhost:3179/bs/ --dest http://localhost:3179/index-mem/", 86 } 87 } 88 89 func (c *syncCmd) RunCommand(args []string) error { 90 if c.loop && !c.removeSrc { 91 return cmdmain.UsageError("Can't use --loop without --removesrc") 92 } 93 if c.verbose { 94 c.logger = log.New(os.Stderr, "", 0) // else nil 95 } 96 if c.all { 97 err := c.syncAll() 98 if err != nil { 99 return fmt.Errorf("sync all failed: %v", err) 100 } 101 return nil 102 } 103 104 ss, err := c.storageFromParam("src", c.src) 105 if err != nil { 106 return err 107 } 108 ds, err := c.storageFromParam("dest", c.dest) 109 if err != nil { 110 return err 111 } 112 ts, err := c.storageFromParam("thirdleg", c.third) 113 if err != nil { 114 return err 115 } 116 117 passNum := 0 118 for { 119 passNum++ 120 stats, err := c.doPass(ss, ds, ts) 121 if c.verbose { 122 log.Printf("sync stats - pass: %d, blobs: %d, bytes %d\n", passNum, stats.BlobsCopied, stats.BytesCopied) 123 } 124 if err != nil { 125 return fmt.Errorf("sync failed: %v", err) 126 } 127 if !c.loop { 128 break 129 } 130 } 131 return nil 132 } 133 134 // A storageType is one of "src", "dest", or "thirdleg". These match the flag names. 135 type storageType string 136 137 const ( 138 storageSource storageType = "src" 139 storageDest storageType = "dest" 140 storageThird storageType = "thirdleg" 141 ) 142 143 // which is one of "src", "dest", or "thirdleg" 144 func (c *syncCmd) storageFromParam(which storageType, val string) (blobserver.Storage, error) { 145 if val == "" { 146 switch which { 147 case storageThird: 148 return nil, nil 149 case storageSource: 150 discl := c.discoClient() 151 discl.SetLogger(c.logger) 152 src, err := discl.BlobRoot() 153 if err != nil { 154 return nil, fmt.Errorf("Failed to discover source server's blob path: %v", err) 155 } 156 val = src 157 } 158 if val == "" { 159 return nil, cmdmain.UsageError("No --" + string(which) + " flag value specified") 160 } 161 } 162 if which == storageDest && val == "stdout" { 163 return nil, nil 164 } 165 if looksLikePath(val) { 166 disk, err := localdisk.New(val) 167 if err != nil { 168 return nil, fmt.Errorf("Interpreted --%v=%q as a local disk path, but got error: %v", which, val, err) 169 } 170 return disk, nil 171 } 172 cl := client.New(val) 173 cl.InsecureTLS = c.insecureTLS 174 cl.SetHTTPClient(&http.Client{ 175 Transport: cl.TransportForConfig(nil), 176 }) 177 cl.SetupAuth() 178 cl.SetLogger(c.logger) 179 return cl, nil 180 } 181 182 func looksLikePath(v string) bool { 183 prefix := func(s string) bool { return strings.HasPrefix(v, s) } 184 return prefix("./") || prefix("/") || prefix("../") 185 } 186 187 type SyncStats struct { 188 BlobsCopied int 189 BytesCopied int64 190 ErrorCount int 191 } 192 193 func (c *syncCmd) syncAll() error { 194 if c.loop { 195 return cmdmain.UsageError("--all can't be used with --loop") 196 } 197 if c.third != "" { 198 return cmdmain.UsageError("--all can't be used with --thirdleg") 199 } 200 if c.dest != "" { 201 return cmdmain.UsageError("--all can't be used with --dest") 202 } 203 204 dc := c.discoClient() 205 dc.SetLogger(c.logger) 206 syncHandlers, err := dc.SyncHandlers() 207 if err != nil { 208 return fmt.Errorf("sync handlers discovery failed: %v", err) 209 } 210 if c.verbose { 211 log.Printf("To be synced:\n") 212 for _, sh := range syncHandlers { 213 log.Printf("%v -> %v", sh.From, sh.To) 214 } 215 } 216 for _, sh := range syncHandlers { 217 from := client.New(sh.From) 218 from.SetLogger(c.logger) 219 from.InsecureTLS = c.insecureTLS 220 from.SetHTTPClient(&http.Client{ 221 Transport: from.TransportForConfig(nil), 222 }) 223 from.SetupAuth() 224 to := client.New(sh.To) 225 to.SetLogger(c.logger) 226 to.InsecureTLS = c.insecureTLS 227 to.SetHTTPClient(&http.Client{ 228 Transport: to.TransportForConfig(nil), 229 }) 230 to.SetupAuth() 231 if c.verbose { 232 log.Printf("Now syncing: %v -> %v", sh.From, sh.To) 233 } 234 stats, err := c.doPass(from, to, nil) 235 if c.verbose { 236 log.Printf("sync stats, blobs: %d, bytes %d\n", stats.BlobsCopied, stats.BytesCopied) 237 } 238 if err != nil { 239 return err 240 } 241 } 242 return nil 243 } 244 245 // discoClient returns a client initialized with a server 246 // based from --src or from the configuration file if --src 247 // is blank. The returned client can then be used to discover 248 // the blobRoot and syncHandlers. 249 func (c *syncCmd) discoClient() *client.Client { 250 var cl *client.Client 251 if c.src == "" { 252 cl = client.NewOrFail() 253 } else { 254 cl = client.New(c.src) 255 } 256 cl.SetLogger(c.logger) 257 cl.InsecureTLS = c.insecureTLS 258 cl.SetHTTPClient(&http.Client{ 259 Transport: cl.TransportForConfig(nil), 260 }) 261 cl.SetupAuth() 262 return cl 263 } 264 265 func enumerateAllBlobs(ctx *context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error { 266 // Use *client.Client's support for enumerating all blobs if 267 // possible, since it could probably do a better job knowing 268 // HTTP boundaries and such. 269 if c, ok := s.(*client.Client); ok { 270 return c.SimpleEnumerateBlobs(ctx, destc) 271 } 272 273 defer close(destc) 274 return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error { 275 destc <- sb 276 return nil 277 }) 278 } 279 280 // src: non-nil source 281 // dest: non-nil destination 282 // thirdLeg: optional third-leg client. if not nil, anything on src 283 // but not on dest will instead be copied to thirdLeg, instead of 284 // directly to dest. (sneakernet mode, copying to a portable drive 285 // and transporting thirdLeg to dest) 286 func (c *syncCmd) doPass(src, dest, thirdLeg blobserver.Storage) (stats SyncStats, retErr error) { 287 srcBlobs := make(chan blob.SizedRef, 100) 288 destBlobs := make(chan blob.SizedRef, 100) 289 srcErr := make(chan error, 1) 290 destErr := make(chan error, 1) 291 292 ctx := context.TODO() 293 defer ctx.Cancel() 294 go func() { 295 srcErr <- enumerateAllBlobs(ctx, src, srcBlobs) 296 }() 297 checkSourceError := func() { 298 if err := <-srcErr; err != nil { 299 retErr = fmt.Errorf("Enumerate error from source: %v", err) 300 } 301 } 302 303 if c.dest == "stdout" { 304 for sb := range srcBlobs { 305 fmt.Printf("%s %d\n", sb.Ref, sb.Size) 306 } 307 checkSourceError() 308 return 309 } 310 311 if c.wipe { 312 // TODO(mpl): dest is a client. make it send a "wipe" request? 313 // upon reception its server then wipes itself if it is a wiper. 314 log.Print("Index wiping not yet supported.") 315 } 316 317 go func() { 318 destErr <- enumerateAllBlobs(ctx, dest, destBlobs) 319 }() 320 checkDestError := func() { 321 if err := <-destErr; err != nil { 322 retErr = errors.New(fmt.Sprintf("Enumerate error from destination: %v", err)) 323 } 324 } 325 326 destNotHaveBlobs := make(chan blob.SizedRef) 327 sizeMismatch := make(chan blob.Ref) 328 readSrcBlobs := srcBlobs 329 if c.verbose { 330 readSrcBlobs = loggingBlobRefChannel(srcBlobs) 331 } 332 mismatches := []blob.Ref{} 333 go client.ListMissingDestinationBlobs(destNotHaveBlobs, sizeMismatch, readSrcBlobs, destBlobs) 334 335 // Handle three-legged mode if tc is provided. 336 checkThirdError := func() {} // default nop 337 syncBlobs := destNotHaveBlobs 338 firstHopDest := dest 339 if thirdLeg != nil { 340 thirdBlobs := make(chan blob.SizedRef, 100) 341 thirdErr := make(chan error, 1) 342 go func() { 343 thirdErr <- enumerateAllBlobs(ctx, thirdLeg, thirdBlobs) 344 }() 345 checkThirdError = func() { 346 if err := <-thirdErr; err != nil { 347 retErr = fmt.Errorf("Enumerate error from third leg: %v", err) 348 } 349 } 350 thirdNeedBlobs := make(chan blob.SizedRef) 351 go client.ListMissingDestinationBlobs(thirdNeedBlobs, sizeMismatch, destNotHaveBlobs, thirdBlobs) 352 syncBlobs = thirdNeedBlobs 353 firstHopDest = thirdLeg 354 } 355 For: 356 for { 357 select { 358 case br := <-sizeMismatch: 359 // TODO(bradfitz): check both sides and repair, carefully. For now, fail. 360 log.Printf("WARNING: blobref %v has differing sizes on source and dest", br) 361 stats.ErrorCount++ 362 mismatches = append(mismatches, br) 363 case sb, ok := <-syncBlobs: 364 if !ok { 365 break For 366 } 367 fmt.Printf("Destination needs blob: %s\n", sb) 368 369 blobReader, size, err := src.FetchStreaming(sb.Ref) 370 if err != nil { 371 stats.ErrorCount++ 372 log.Printf("Error fetching %s: %v", sb.Ref, err) 373 continue 374 } 375 if size != sb.Size { 376 stats.ErrorCount++ 377 log.Printf("Source blobserver's enumerate size of %d for blob %s doesn't match its Get size of %d", 378 sb.Size, sb.Ref, size) 379 continue 380 } 381 382 if _, err := blobserver.Receive(firstHopDest, sb.Ref, blobReader); err != nil { 383 stats.ErrorCount++ 384 log.Printf("Upload of %s to destination blobserver failed: %v", sb.Ref, err) 385 continue 386 } 387 stats.BlobsCopied++ 388 stats.BytesCopied += size 389 390 if c.removeSrc { 391 if err = src.RemoveBlobs([]blob.Ref{sb.Ref}); err != nil { 392 stats.ErrorCount++ 393 log.Printf("Failed to delete %s from source: %v", sb.Ref, err) 394 } 395 } 396 } 397 } 398 399 checkSourceError() 400 checkDestError() 401 checkThirdError() 402 if retErr == nil && stats.ErrorCount > 0 { 403 retErr = fmt.Errorf("%d errors during sync", stats.ErrorCount) 404 } 405 return stats, retErr 406 } 407 408 func loggingBlobRefChannel(ch <-chan blob.SizedRef) chan blob.SizedRef { 409 ch2 := make(chan blob.SizedRef) 410 go func() { 411 defer close(ch2) 412 var last time.Time 413 var nblob, nbyte int64 414 for v := range ch { 415 ch2 <- v 416 nblob++ 417 nbyte += v.Size 418 now := time.Now() 419 if last.IsZero() || now.After(last.Add(1*time.Second)) { 420 last = now 421 log.Printf("At source blob %v (%d blobs, %d bytes)", v.Ref, nblob, nbyte) 422 } 423 } 424 log.Printf("Total blobs: %d, %d bytes", nblob, nbyte) 425 }() 426 return ch2 427 }