github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/cmd/camtool/sync.go (about) 1 /* 2 Copyright 2013 The Camlistore Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "flag" 21 "fmt" 22 "log" 23 "net/http" 24 "os" 25 "strconv" 26 "strings" 27 "time" 28 29 "camlistore.org/pkg/blob" 30 "camlistore.org/pkg/blobserver" 31 "camlistore.org/pkg/blobserver/localdisk" 32 "camlistore.org/pkg/client" 33 "camlistore.org/pkg/cmdmain" 34 "camlistore.org/pkg/context" 35 ) 36 37 type syncCmd struct { 38 src string 39 dest string 40 third string 41 srcKeyID string // GPG public key ID of the source server, if supported. 42 destKeyID string // GPG public key ID of the destination server, if supported. 43 44 loop bool 45 verbose bool 46 all bool 47 removeSrc bool 48 wipe bool 49 insecureTLS bool 50 oneIsDisk bool // Whether one of src or dest is a local disk. 51 52 logger *log.Logger 53 } 54 55 func init() { 56 cmdmain.RegisterCommand("sync", func(flags *flag.FlagSet) cmdmain.CommandRunner { 57 cmd := new(syncCmd) 58 flags.StringVar(&cmd.src, "src", "", "Source blobserver. "+serverFlagHelp) 59 flags.StringVar(&cmd.dest, "dest", "", "Destination blobserver (same format as src), or 'stdout' to just enumerate the --src blobs to stdout.") 60 flags.StringVar(&cmd.third, "thirdleg", "", "Copy blobs present in source but missing from destination to this 'third leg' blob store, instead of the destination. (same format as src)") 61 62 flags.BoolVar(&cmd.loop, "loop", false, "Create an associate a new permanode for the uploaded file or directory.") 63 flags.BoolVar(&cmd.verbose, "verbose", false, "Be verbose.") 64 flags.BoolVar(&cmd.wipe, "wipe", false, "If dest is an index, drop it and repopulate it from scratch. NOOP for now.") 65 flags.BoolVar(&cmd.all, "all", false, "Discover all sync destinations configured on the source server and run them.") 66 flags.BoolVar(&cmd.removeSrc, "removesrc", false, "Remove each blob from the source after syncing to the destination; for queue processing.") 67 // TODO(mpl): maybe move this flag up to the client pkg as an AddFlag, as it can be used by all commands. 68 if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug { 69 flags.BoolVar(&cmd.insecureTLS, "insecure", false, "If set, when using TLS, the server's certificates verification is disabled, and they are not checked against the trustedCerts in the client configuration either.") 70 } 71 72 return cmd 73 }) 74 } 75 76 func (c *syncCmd) Describe() string { 77 return "Synchronize blobs from a source to a destination." 78 } 79 80 func (c *syncCmd) Usage() { 81 fmt.Fprintf(cmdmain.Stderr, "Usage: camtool [globalopts] sync [syncopts] \n") 82 } 83 84 func (c *syncCmd) Examples() []string { 85 return []string{ 86 "--all", 87 "--src http://localhost:3179/bs/ --dest http://localhost:3179/index-mem/", 88 } 89 } 90 91 func (c *syncCmd) RunCommand(args []string) error { 92 if c.loop && !c.removeSrc { 93 return cmdmain.UsageError("Can't use --loop without --removesrc") 94 } 95 if c.verbose { 96 c.logger = log.New(cmdmain.Stderr, "", 0) // else nil 97 } 98 if c.all { 99 err := c.syncAll() 100 if err != nil { 101 return fmt.Errorf("sync all failed: %v", err) 102 } 103 return nil 104 } 105 106 ss, err := c.storageFromParam("src", c.src) 107 if err != nil { 108 return err 109 } 110 ds, err := c.storageFromParam("dest", c.dest) 111 if err != nil { 112 return err 113 } 114 ts, err := c.storageFromParam("thirdleg", c.third) 115 if err != nil { 116 return err 117 } 118 119 differentKeyIDs := fmt.Sprintf("WARNING: the source server GPG key ID (%v) and the destination's (%v) differ. All blobs will be synced, but because the indexer at the other side is indexing claims by a different user, you may not see what you expect in that server's web UI, etc.", c.srcKeyID, c.destKeyID) 120 121 if c.dest != "stdout" && !c.oneIsDisk && c.srcKeyID != c.destKeyID { // both blank is ok. 122 // Warn at the top (and hope the user sees it and can abort if it was a mistake): 123 fmt.Fprintln(cmdmain.Stderr, differentKeyIDs) 124 // Warn also at the end (in case the user missed the first one) 125 defer fmt.Fprintln(cmdmain.Stderr, differentKeyIDs) 126 } 127 128 passNum := 0 129 for { 130 passNum++ 131 stats, err := c.doPass(ss, ds, ts) 132 if c.verbose { 133 log.Printf("sync stats - pass: %d, blobs: %d, bytes %d\n", passNum, stats.BlobsCopied, stats.BytesCopied) 134 } 135 if err != nil { 136 return fmt.Errorf("sync failed: %v", err) 137 } 138 if !c.loop { 139 break 140 } 141 } 142 return nil 143 } 144 145 // A storageType is one of "src", "dest", or "thirdleg". These match the flag names. 146 type storageType string 147 148 const ( 149 storageSource storageType = "src" 150 storageDest storageType = "dest" 151 storageThird storageType = "thirdleg" 152 ) 153 154 // which is one of "src", "dest", or "thirdleg" 155 func (c *syncCmd) storageFromParam(which storageType, val string) (blobserver.Storage, error) { 156 var httpClient *http.Client 157 158 if val == "" { 159 switch which { 160 case storageThird: 161 return nil, nil 162 case storageSource: 163 discl := c.discoClient() 164 discl.SetLogger(c.logger) 165 src, err := discl.BlobRoot() 166 if err != nil { 167 return nil, fmt.Errorf("Failed to discover source server's blob path: %v", err) 168 } 169 val = src 170 httpClient = discl.HTTPClient() 171 } 172 if val == "" { 173 return nil, cmdmain.UsageError("No --" + string(which) + " flag value specified") 174 } 175 } 176 if which == storageDest && val == "stdout" { 177 return nil, nil 178 } 179 if looksLikePath(val) { 180 disk, err := localdisk.New(val) 181 if err != nil { 182 return nil, fmt.Errorf("Interpreted --%v=%q as a local disk path, but got error: %v", which, val, err) 183 } 184 c.oneIsDisk = true 185 return disk, nil 186 } 187 cl := client.New(val) 188 cl.InsecureTLS = c.insecureTLS 189 if httpClient == nil { 190 httpClient = &http.Client{ 191 Transport: cl.TransportForConfig(nil), 192 } 193 } 194 cl.SetHTTPClient(httpClient) 195 if err := cl.SetupAuth(); err != nil { 196 return nil, fmt.Errorf("could not setup auth for connecting to %v: %v", val, err) 197 } 198 cl.SetLogger(c.logger) 199 serverKeyID, err := cl.ServerKeyID() 200 if err != nil && err != client.ErrNoSigning { 201 fmt.Fprintf(cmdmain.Stderr, "Failed to discover keyId for server %v: %v", val, err) 202 } else { 203 if which == storageSource { 204 c.srcKeyID = serverKeyID 205 } else if which == storageDest { 206 c.destKeyID = serverKeyID 207 } 208 } 209 return cl, nil 210 } 211 212 func looksLikePath(v string) bool { 213 prefix := func(s string) bool { return strings.HasPrefix(v, s) } 214 return prefix("./") || prefix("/") || prefix("../") 215 } 216 217 type SyncStats struct { 218 BlobsCopied int 219 BytesCopied int64 220 ErrorCount int 221 } 222 223 func (c *syncCmd) syncAll() error { 224 if c.loop { 225 return cmdmain.UsageError("--all can't be used with --loop") 226 } 227 if c.third != "" { 228 return cmdmain.UsageError("--all can't be used with --thirdleg") 229 } 230 if c.dest != "" { 231 return cmdmain.UsageError("--all can't be used with --dest") 232 } 233 234 dc := c.discoClient() 235 dc.SetLogger(c.logger) 236 syncHandlers, err := dc.SyncHandlers() 237 if err != nil { 238 return fmt.Errorf("sync handlers discovery failed: %v", err) 239 } 240 if c.verbose { 241 log.Printf("To be synced:\n") 242 for _, sh := range syncHandlers { 243 log.Printf("%v -> %v", sh.From, sh.To) 244 } 245 } 246 for _, sh := range syncHandlers { 247 from := client.New(sh.From) 248 from.SetLogger(c.logger) 249 from.InsecureTLS = c.insecureTLS 250 from.SetHTTPClient(&http.Client{ 251 Transport: from.TransportForConfig(nil), 252 }) 253 if err := from.SetupAuth(); err != nil { 254 return fmt.Errorf("could not setup auth for connecting to %v: %v", sh.From, err) 255 } 256 to := client.New(sh.To) 257 to.SetLogger(c.logger) 258 to.InsecureTLS = c.insecureTLS 259 to.SetHTTPClient(&http.Client{ 260 Transport: to.TransportForConfig(nil), 261 }) 262 if err := to.SetupAuth(); err != nil { 263 return fmt.Errorf("could not setup auth for connecting to %v: %v", sh.To, err) 264 } 265 if c.verbose { 266 log.Printf("Now syncing: %v -> %v", sh.From, sh.To) 267 } 268 stats, err := c.doPass(from, to, nil) 269 if c.verbose { 270 log.Printf("sync stats, blobs: %d, bytes %d\n", stats.BlobsCopied, stats.BytesCopied) 271 } 272 if err != nil { 273 return err 274 } 275 } 276 return nil 277 } 278 279 // discoClient returns a client initialized with a server 280 // based from --src or from the configuration file if --src 281 // is blank. The returned client can then be used to discover 282 // the blobRoot and syncHandlers. 283 func (c *syncCmd) discoClient() *client.Client { 284 cl := newClient(c.src) 285 cl.SetLogger(c.logger) 286 cl.InsecureTLS = c.insecureTLS 287 return cl 288 } 289 290 func enumerateAllBlobs(ctx *context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error { 291 // Use *client.Client's support for enumerating all blobs if 292 // possible, since it could probably do a better job knowing 293 // HTTP boundaries and such. 294 if c, ok := s.(*client.Client); ok { 295 return c.SimpleEnumerateBlobs(ctx, destc) 296 } 297 298 defer close(destc) 299 return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error { 300 select { 301 case destc <- sb: 302 case <-ctx.Done(): 303 return context.ErrCanceled 304 } 305 return nil 306 }) 307 } 308 309 // src: non-nil source 310 // dest: non-nil destination 311 // thirdLeg: optional third-leg client. if not nil, anything on src 312 // but not on dest will instead be copied to thirdLeg, instead of 313 // directly to dest. (sneakernet mode, copying to a portable drive 314 // and transporting thirdLeg to dest) 315 func (c *syncCmd) doPass(src, dest, thirdLeg blobserver.Storage) (stats SyncStats, retErr error) { 316 srcBlobs := make(chan blob.SizedRef, 100) 317 destBlobs := make(chan blob.SizedRef, 100) 318 srcErr := make(chan error, 1) 319 destErr := make(chan error, 1) 320 321 ctx := context.TODO() 322 enumCtx := ctx.New() // used for all (2 or 3) enumerates 323 defer enumCtx.Cancel() 324 enumerate := func(errc chan<- error, sto blobserver.Storage, blobc chan<- blob.SizedRef) { 325 err := enumerateAllBlobs(enumCtx, sto, blobc) 326 if err != nil { 327 enumCtx.Cancel() 328 } 329 errc <- err 330 } 331 332 go enumerate(srcErr, src, srcBlobs) 333 checkSourceError := func() { 334 if err := <-srcErr; err != nil && err != context.ErrCanceled { 335 retErr = fmt.Errorf("Enumerate error from source: %v", err) 336 } 337 } 338 339 if c.dest == "stdout" { 340 for sb := range srcBlobs { 341 fmt.Fprintf(cmdmain.Stdout, "%s %d\n", sb.Ref, sb.Size) 342 } 343 checkSourceError() 344 return 345 } 346 347 if c.wipe { 348 // TODO(mpl): dest is a client. make it send a "wipe" request? 349 // upon reception its server then wipes itself if it is a wiper. 350 log.Print("Index wiping not yet supported.") 351 } 352 353 go enumerate(destErr, dest, destBlobs) 354 checkDestError := func() { 355 if err := <-destErr; err != nil && err != context.ErrCanceled { 356 retErr = fmt.Errorf("Enumerate error from destination: %v", err) 357 } 358 } 359 360 destNotHaveBlobs := make(chan blob.SizedRef) 361 362 readSrcBlobs := srcBlobs 363 if c.verbose { 364 readSrcBlobs = loggingBlobRefChannel(srcBlobs) 365 } 366 367 mismatches := []blob.Ref{} 368 onMismatch := func(br blob.Ref) { 369 // TODO(bradfitz): check both sides and repair, carefully. For now, fail. 370 log.Printf("WARNING: blobref %v has differing sizes on source and dest", br) 371 stats.ErrorCount++ 372 mismatches = append(mismatches, br) 373 } 374 375 go blobserver.ListMissingDestinationBlobs(destNotHaveBlobs, onMismatch, readSrcBlobs, destBlobs) 376 377 // Handle three-legged mode if tc is provided. 378 checkThirdError := func() {} // default nop 379 syncBlobs := destNotHaveBlobs 380 firstHopDest := dest 381 if thirdLeg != nil { 382 thirdBlobs := make(chan blob.SizedRef, 100) 383 thirdErr := make(chan error, 1) 384 go enumerate(thirdErr, thirdLeg, thirdBlobs) 385 checkThirdError = func() { 386 if err := <-thirdErr; err != nil && err != context.ErrCanceled { 387 retErr = fmt.Errorf("Enumerate error from third leg: %v", err) 388 } 389 } 390 thirdNeedBlobs := make(chan blob.SizedRef) 391 go blobserver.ListMissingDestinationBlobs(thirdNeedBlobs, onMismatch, destNotHaveBlobs, thirdBlobs) 392 syncBlobs = thirdNeedBlobs 393 firstHopDest = thirdLeg 394 } 395 396 for sb := range syncBlobs { 397 fmt.Fprintf(cmdmain.Stdout, "Destination needs blob: %s\n", sb) 398 399 blobReader, size, err := src.Fetch(sb.Ref) 400 if err != nil { 401 stats.ErrorCount++ 402 log.Printf("Error fetching %s: %v", sb.Ref, err) 403 continue 404 } 405 if size != sb.Size { 406 stats.ErrorCount++ 407 log.Printf("Source blobserver's enumerate size of %d for blob %s doesn't match its Get size of %d", 408 sb.Size, sb.Ref, size) 409 continue 410 } 411 412 if _, err := blobserver.Receive(firstHopDest, sb.Ref, blobReader); err != nil { 413 stats.ErrorCount++ 414 log.Printf("Upload of %s to destination blobserver failed: %v", sb.Ref, err) 415 continue 416 } 417 stats.BlobsCopied++ 418 stats.BytesCopied += int64(size) 419 420 if c.removeSrc { 421 if err = src.RemoveBlobs([]blob.Ref{sb.Ref}); err != nil { 422 stats.ErrorCount++ 423 log.Printf("Failed to delete %s from source: %v", sb.Ref, err) 424 } 425 } 426 } 427 428 checkSourceError() 429 checkDestError() 430 checkThirdError() 431 if retErr == nil && stats.ErrorCount > 0 { 432 retErr = fmt.Errorf("%d errors during sync", stats.ErrorCount) 433 } 434 return stats, retErr 435 } 436 437 func loggingBlobRefChannel(ch <-chan blob.SizedRef) chan blob.SizedRef { 438 ch2 := make(chan blob.SizedRef) 439 go func() { 440 defer close(ch2) 441 var last time.Time 442 var nblob, nbyte int64 443 for v := range ch { 444 ch2 <- v 445 nblob++ 446 nbyte += int64(v.Size) 447 now := time.Now() 448 if last.IsZero() || now.After(last.Add(1*time.Second)) { 449 last = now 450 log.Printf("At source blob %v (%d blobs, %d bytes)", v.Ref, nblob, nbyte) 451 } 452 } 453 log.Printf("Total blobs: %d, %d bytes", nblob, nbyte) 454 }() 455 return ch2 456 }