github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/cmd/camtool/sync.go (about)

     1  /*
     2  Copyright 2013 The Camlistore Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"errors"
    21  	"flag"
    22  	"fmt"
    23  	"log"
    24  	"net/http"
    25  	"os"
    26  	"strconv"
    27  	"strings"
    28  	"time"
    29  
    30  	"camlistore.org/pkg/blob"
    31  	"camlistore.org/pkg/blobserver"
    32  	"camlistore.org/pkg/blobserver/localdisk"
    33  	"camlistore.org/pkg/client"
    34  	"camlistore.org/pkg/cmdmain"
    35  	"camlistore.org/pkg/context"
    36  )
    37  
    38  type syncCmd struct {
    39  	src   string
    40  	dest  string
    41  	third string
    42  
    43  	loop        bool
    44  	verbose     bool
    45  	all         bool
    46  	removeSrc   bool
    47  	wipe        bool
    48  	insecureTLS bool
    49  
    50  	logger *log.Logger
    51  }
    52  
    53  func init() {
    54  	cmdmain.RegisterCommand("sync", func(flags *flag.FlagSet) cmdmain.CommandRunner {
    55  		cmd := new(syncCmd)
    56  		flags.StringVar(&cmd.src, "src", "", "Source blobserver is either a URL prefix (with optional path), a host[:port], a path (starting with /, ./, or ../), or blank to use the Camlistore client config's default host.")
    57  		flags.StringVar(&cmd.dest, "dest", "", "Destination blobserver (same format as src), or 'stdout' to just enumerate the --src blobs to stdout.")
    58  		flags.StringVar(&cmd.third, "thirdleg", "", "Copy blobs present in source but missing from destination to this 'third leg' blob store, instead of the destination. (same format as src)")
    59  
    60  		flags.BoolVar(&cmd.loop, "loop", false, "Create an associate a new permanode for the uploaded file or directory.")
    61  		flags.BoolVar(&cmd.verbose, "verbose", false, "Be verbose.")
    62  		flags.BoolVar(&cmd.wipe, "wipe", false, "If dest is an index, drop it and repopulate it from scratch. NOOP for now.")
    63  		flags.BoolVar(&cmd.all, "all", false, "Discover all sync destinations configured on the source server and run them.")
    64  		flags.BoolVar(&cmd.removeSrc, "removesrc", false, "Remove each blob from the source after syncing to the destination; for queue processing.")
    65  		// TODO(mpl): maybe move this flag up to the client pkg as an AddFlag, as it can be used by all commands.
    66  		if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug {
    67  			flags.BoolVar(&cmd.insecureTLS, "insecure", false, "If set, when using TLS, the server's certificates verification is disabled, and they are not checked against the trustedCerts in the client configuration either.")
    68  		}
    69  
    70  		return cmd
    71  	})
    72  }
    73  
    74  func (c *syncCmd) Describe() string {
    75  	return "Synchronize blobs from a source to a destination."
    76  }
    77  
    78  func (c *syncCmd) Usage() {
    79  	fmt.Fprintf(os.Stderr, "Usage: camtool [globalopts] sync [syncopts] \n")
    80  }
    81  
    82  func (c *syncCmd) Examples() []string {
    83  	return []string{
    84  		"--all",
    85  		"--src http://localhost:3179/bs/ --dest http://localhost:3179/index-mem/",
    86  	}
    87  }
    88  
    89  func (c *syncCmd) RunCommand(args []string) error {
    90  	if c.loop && !c.removeSrc {
    91  		return cmdmain.UsageError("Can't use --loop without --removesrc")
    92  	}
    93  	if c.verbose {
    94  		c.logger = log.New(os.Stderr, "", 0) // else nil
    95  	}
    96  	if c.all {
    97  		err := c.syncAll()
    98  		if err != nil {
    99  			return fmt.Errorf("sync all failed: %v", err)
   100  		}
   101  		return nil
   102  	}
   103  
   104  	ss, err := c.storageFromParam("src", c.src)
   105  	if err != nil {
   106  		return err
   107  	}
   108  	ds, err := c.storageFromParam("dest", c.dest)
   109  	if err != nil {
   110  		return err
   111  	}
   112  	ts, err := c.storageFromParam("thirdleg", c.third)
   113  	if err != nil {
   114  		return err
   115  	}
   116  
   117  	passNum := 0
   118  	for {
   119  		passNum++
   120  		stats, err := c.doPass(ss, ds, ts)
   121  		if c.verbose {
   122  			log.Printf("sync stats - pass: %d, blobs: %d, bytes %d\n", passNum, stats.BlobsCopied, stats.BytesCopied)
   123  		}
   124  		if err != nil {
   125  			return fmt.Errorf("sync failed: %v", err)
   126  		}
   127  		if !c.loop {
   128  			break
   129  		}
   130  	}
   131  	return nil
   132  }
   133  
   134  // A storageType is one of "src", "dest", or "thirdleg". These match the flag names.
   135  type storageType string
   136  
   137  const (
   138  	storageSource storageType = "src"
   139  	storageDest   storageType = "dest"
   140  	storageThird  storageType = "thirdleg"
   141  )
   142  
   143  // which is one of "src", "dest", or "thirdleg"
   144  func (c *syncCmd) storageFromParam(which storageType, val string) (blobserver.Storage, error) {
   145  	if val == "" {
   146  		switch which {
   147  		case storageThird:
   148  			return nil, nil
   149  		case storageSource:
   150  			discl := c.discoClient()
   151  			discl.SetLogger(c.logger)
   152  			src, err := discl.BlobRoot()
   153  			if err != nil {
   154  				return nil, fmt.Errorf("Failed to discover source server's blob path: %v", err)
   155  			}
   156  			val = src
   157  		}
   158  		if val == "" {
   159  			return nil, cmdmain.UsageError("No --" + string(which) + " flag value specified")
   160  		}
   161  	}
   162  	if which == storageDest && val == "stdout" {
   163  		return nil, nil
   164  	}
   165  	if looksLikePath(val) {
   166  		disk, err := localdisk.New(val)
   167  		if err != nil {
   168  			return nil, fmt.Errorf("Interpreted --%v=%q as a local disk path, but got error: %v", which, val, err)
   169  		}
   170  		return disk, nil
   171  	}
   172  	cl := client.New(val)
   173  	cl.InsecureTLS = c.insecureTLS
   174  	cl.SetHTTPClient(&http.Client{
   175  		Transport: cl.TransportForConfig(nil),
   176  	})
   177  	cl.SetupAuth()
   178  	cl.SetLogger(c.logger)
   179  	return cl, nil
   180  }
   181  
   182  func looksLikePath(v string) bool {
   183  	prefix := func(s string) bool { return strings.HasPrefix(v, s) }
   184  	return prefix("./") || prefix("/") || prefix("../")
   185  }
   186  
   187  type SyncStats struct {
   188  	BlobsCopied int
   189  	BytesCopied int64
   190  	ErrorCount  int
   191  }
   192  
   193  func (c *syncCmd) syncAll() error {
   194  	if c.loop {
   195  		return cmdmain.UsageError("--all can't be used with --loop")
   196  	}
   197  	if c.third != "" {
   198  		return cmdmain.UsageError("--all can't be used with --thirdleg")
   199  	}
   200  	if c.dest != "" {
   201  		return cmdmain.UsageError("--all can't be used with --dest")
   202  	}
   203  
   204  	dc := c.discoClient()
   205  	dc.SetLogger(c.logger)
   206  	syncHandlers, err := dc.SyncHandlers()
   207  	if err != nil {
   208  		return fmt.Errorf("sync handlers discovery failed: %v", err)
   209  	}
   210  	if c.verbose {
   211  		log.Printf("To be synced:\n")
   212  		for _, sh := range syncHandlers {
   213  			log.Printf("%v -> %v", sh.From, sh.To)
   214  		}
   215  	}
   216  	for _, sh := range syncHandlers {
   217  		from := client.New(sh.From)
   218  		from.SetLogger(c.logger)
   219  		from.InsecureTLS = c.insecureTLS
   220  		from.SetHTTPClient(&http.Client{
   221  			Transport: from.TransportForConfig(nil),
   222  		})
   223  		from.SetupAuth()
   224  		to := client.New(sh.To)
   225  		to.SetLogger(c.logger)
   226  		to.InsecureTLS = c.insecureTLS
   227  		to.SetHTTPClient(&http.Client{
   228  			Transport: to.TransportForConfig(nil),
   229  		})
   230  		to.SetupAuth()
   231  		if c.verbose {
   232  			log.Printf("Now syncing: %v -> %v", sh.From, sh.To)
   233  		}
   234  		stats, err := c.doPass(from, to, nil)
   235  		if c.verbose {
   236  			log.Printf("sync stats, blobs: %d, bytes %d\n", stats.BlobsCopied, stats.BytesCopied)
   237  		}
   238  		if err != nil {
   239  			return err
   240  		}
   241  	}
   242  	return nil
   243  }
   244  
   245  // discoClient returns a client initialized with a server
   246  // based from --src or from the configuration file if --src
   247  // is blank. The returned client can then be used to discover
   248  // the blobRoot and syncHandlers.
   249  func (c *syncCmd) discoClient() *client.Client {
   250  	var cl *client.Client
   251  	if c.src == "" {
   252  		cl = client.NewOrFail()
   253  	} else {
   254  		cl = client.New(c.src)
   255  	}
   256  	cl.SetLogger(c.logger)
   257  	cl.InsecureTLS = c.insecureTLS
   258  	cl.SetHTTPClient(&http.Client{
   259  		Transport: cl.TransportForConfig(nil),
   260  	})
   261  	cl.SetupAuth()
   262  	return cl
   263  }
   264  
   265  func enumerateAllBlobs(ctx *context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error {
   266  	// Use *client.Client's support for enumerating all blobs if
   267  	// possible, since it could probably do a better job knowing
   268  	// HTTP boundaries and such.
   269  	if c, ok := s.(*client.Client); ok {
   270  		return c.SimpleEnumerateBlobs(ctx, destc)
   271  	}
   272  
   273  	defer close(destc)
   274  	return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error {
   275  		destc <- sb
   276  		return nil
   277  	})
   278  }
   279  
   280  // src: non-nil source
   281  // dest: non-nil destination
   282  // thirdLeg: optional third-leg client. if not nil, anything on src
   283  //     but not on dest will instead be copied to thirdLeg, instead of
   284  //     directly to dest. (sneakernet mode, copying to a portable drive
   285  //     and transporting thirdLeg to dest)
   286  func (c *syncCmd) doPass(src, dest, thirdLeg blobserver.Storage) (stats SyncStats, retErr error) {
   287  	srcBlobs := make(chan blob.SizedRef, 100)
   288  	destBlobs := make(chan blob.SizedRef, 100)
   289  	srcErr := make(chan error, 1)
   290  	destErr := make(chan error, 1)
   291  
   292  	ctx := context.TODO()
   293  	defer ctx.Cancel()
   294  	go func() {
   295  		srcErr <- enumerateAllBlobs(ctx, src, srcBlobs)
   296  	}()
   297  	checkSourceError := func() {
   298  		if err := <-srcErr; err != nil {
   299  			retErr = fmt.Errorf("Enumerate error from source: %v", err)
   300  		}
   301  	}
   302  
   303  	if c.dest == "stdout" {
   304  		for sb := range srcBlobs {
   305  			fmt.Printf("%s %d\n", sb.Ref, sb.Size)
   306  		}
   307  		checkSourceError()
   308  		return
   309  	}
   310  
   311  	if c.wipe {
   312  		// TODO(mpl): dest is a client. make it send a "wipe" request?
   313  		// upon reception its server then wipes itself if it is a wiper.
   314  		log.Print("Index wiping not yet supported.")
   315  	}
   316  
   317  	go func() {
   318  		destErr <- enumerateAllBlobs(ctx, dest, destBlobs)
   319  	}()
   320  	checkDestError := func() {
   321  		if err := <-destErr; err != nil {
   322  			retErr = errors.New(fmt.Sprintf("Enumerate error from destination: %v", err))
   323  		}
   324  	}
   325  
   326  	destNotHaveBlobs := make(chan blob.SizedRef)
   327  	sizeMismatch := make(chan blob.Ref)
   328  	readSrcBlobs := srcBlobs
   329  	if c.verbose {
   330  		readSrcBlobs = loggingBlobRefChannel(srcBlobs)
   331  	}
   332  	mismatches := []blob.Ref{}
   333  	go client.ListMissingDestinationBlobs(destNotHaveBlobs, sizeMismatch, readSrcBlobs, destBlobs)
   334  
   335  	// Handle three-legged mode if tc is provided.
   336  	checkThirdError := func() {} // default nop
   337  	syncBlobs := destNotHaveBlobs
   338  	firstHopDest := dest
   339  	if thirdLeg != nil {
   340  		thirdBlobs := make(chan blob.SizedRef, 100)
   341  		thirdErr := make(chan error, 1)
   342  		go func() {
   343  			thirdErr <- enumerateAllBlobs(ctx, thirdLeg, thirdBlobs)
   344  		}()
   345  		checkThirdError = func() {
   346  			if err := <-thirdErr; err != nil {
   347  				retErr = fmt.Errorf("Enumerate error from third leg: %v", err)
   348  			}
   349  		}
   350  		thirdNeedBlobs := make(chan blob.SizedRef)
   351  		go client.ListMissingDestinationBlobs(thirdNeedBlobs, sizeMismatch, destNotHaveBlobs, thirdBlobs)
   352  		syncBlobs = thirdNeedBlobs
   353  		firstHopDest = thirdLeg
   354  	}
   355  For:
   356  	for {
   357  		select {
   358  		case br := <-sizeMismatch:
   359  			// TODO(bradfitz): check both sides and repair, carefully.  For now, fail.
   360  			log.Printf("WARNING: blobref %v has differing sizes on source and dest", br)
   361  			stats.ErrorCount++
   362  			mismatches = append(mismatches, br)
   363  		case sb, ok := <-syncBlobs:
   364  			if !ok {
   365  				break For
   366  			}
   367  			fmt.Printf("Destination needs blob: %s\n", sb)
   368  
   369  			blobReader, size, err := src.FetchStreaming(sb.Ref)
   370  			if err != nil {
   371  				stats.ErrorCount++
   372  				log.Printf("Error fetching %s: %v", sb.Ref, err)
   373  				continue
   374  			}
   375  			if size != sb.Size {
   376  				stats.ErrorCount++
   377  				log.Printf("Source blobserver's enumerate size of %d for blob %s doesn't match its Get size of %d",
   378  					sb.Size, sb.Ref, size)
   379  				continue
   380  			}
   381  
   382  			if _, err := blobserver.Receive(firstHopDest, sb.Ref, blobReader); err != nil {
   383  				stats.ErrorCount++
   384  				log.Printf("Upload of %s to destination blobserver failed: %v", sb.Ref, err)
   385  				continue
   386  			}
   387  			stats.BlobsCopied++
   388  			stats.BytesCopied += size
   389  
   390  			if c.removeSrc {
   391  				if err = src.RemoveBlobs([]blob.Ref{sb.Ref}); err != nil {
   392  					stats.ErrorCount++
   393  					log.Printf("Failed to delete %s from source: %v", sb.Ref, err)
   394  				}
   395  			}
   396  		}
   397  	}
   398  
   399  	checkSourceError()
   400  	checkDestError()
   401  	checkThirdError()
   402  	if retErr == nil && stats.ErrorCount > 0 {
   403  		retErr = fmt.Errorf("%d errors during sync", stats.ErrorCount)
   404  	}
   405  	return stats, retErr
   406  }
   407  
   408  func loggingBlobRefChannel(ch <-chan blob.SizedRef) chan blob.SizedRef {
   409  	ch2 := make(chan blob.SizedRef)
   410  	go func() {
   411  		defer close(ch2)
   412  		var last time.Time
   413  		var nblob, nbyte int64
   414  		for v := range ch {
   415  			ch2 <- v
   416  			nblob++
   417  			nbyte += v.Size
   418  			now := time.Now()
   419  			if last.IsZero() || now.After(last.Add(1*time.Second)) {
   420  				last = now
   421  				log.Printf("At source blob %v (%d blobs, %d bytes)", v.Ref, nblob, nbyte)
   422  			}
   423  		}
   424  		log.Printf("Total blobs: %d, %d bytes", nblob, nbyte)
   425  	}()
   426  	return ch2
   427  }