github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/cmd/camtool/sync.go (about)

     1  /*
     2  Copyright 2013 The Camlistore Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"flag"
    21  	"fmt"
    22  	"log"
    23  	"net/http"
    24  	"os"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"camlistore.org/pkg/blob"
    30  	"camlistore.org/pkg/blobserver"
    31  	"camlistore.org/pkg/blobserver/localdisk"
    32  	"camlistore.org/pkg/client"
    33  	"camlistore.org/pkg/cmdmain"
    34  	"camlistore.org/pkg/context"
    35  )
    36  
    37  type syncCmd struct {
    38  	src       string
    39  	dest      string
    40  	third     string
    41  	srcKeyID  string // GPG public key ID of the source server, if supported.
    42  	destKeyID string // GPG public key ID of the destination server, if supported.
    43  
    44  	loop        bool
    45  	verbose     bool
    46  	all         bool
    47  	removeSrc   bool
    48  	wipe        bool
    49  	insecureTLS bool
    50  	oneIsDisk   bool // Whether one of src or dest is a local disk.
    51  
    52  	logger *log.Logger
    53  }
    54  
    55  func init() {
    56  	cmdmain.RegisterCommand("sync", func(flags *flag.FlagSet) cmdmain.CommandRunner {
    57  		cmd := new(syncCmd)
    58  		flags.StringVar(&cmd.src, "src", "", "Source blobserver. "+serverFlagHelp)
    59  		flags.StringVar(&cmd.dest, "dest", "", "Destination blobserver (same format as src), or 'stdout' to just enumerate the --src blobs to stdout.")
    60  		flags.StringVar(&cmd.third, "thirdleg", "", "Copy blobs present in source but missing from destination to this 'third leg' blob store, instead of the destination. (same format as src)")
    61  
    62  		flags.BoolVar(&cmd.loop, "loop", false, "Create an associate a new permanode for the uploaded file or directory.")
    63  		flags.BoolVar(&cmd.verbose, "verbose", false, "Be verbose.")
    64  		flags.BoolVar(&cmd.wipe, "wipe", false, "If dest is an index, drop it and repopulate it from scratch. NOOP for now.")
    65  		flags.BoolVar(&cmd.all, "all", false, "Discover all sync destinations configured on the source server and run them.")
    66  		flags.BoolVar(&cmd.removeSrc, "removesrc", false, "Remove each blob from the source after syncing to the destination; for queue processing.")
    67  		// TODO(mpl): maybe move this flag up to the client pkg as an AddFlag, as it can be used by all commands.
    68  		if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug {
    69  			flags.BoolVar(&cmd.insecureTLS, "insecure", false, "If set, when using TLS, the server's certificates verification is disabled, and they are not checked against the trustedCerts in the client configuration either.")
    70  		}
    71  
    72  		return cmd
    73  	})
    74  }
    75  
    76  func (c *syncCmd) Describe() string {
    77  	return "Synchronize blobs from a source to a destination."
    78  }
    79  
    80  func (c *syncCmd) Usage() {
    81  	fmt.Fprintf(cmdmain.Stderr, "Usage: camtool [globalopts] sync [syncopts] \n")
    82  }
    83  
    84  func (c *syncCmd) Examples() []string {
    85  	return []string{
    86  		"--all",
    87  		"--src http://localhost:3179/bs/ --dest http://localhost:3179/index-mem/",
    88  	}
    89  }
    90  
    91  func (c *syncCmd) RunCommand(args []string) error {
    92  	if c.loop && !c.removeSrc {
    93  		return cmdmain.UsageError("Can't use --loop without --removesrc")
    94  	}
    95  	if c.verbose {
    96  		c.logger = log.New(cmdmain.Stderr, "", 0) // else nil
    97  	}
    98  	if c.all {
    99  		err := c.syncAll()
   100  		if err != nil {
   101  			return fmt.Errorf("sync all failed: %v", err)
   102  		}
   103  		return nil
   104  	}
   105  
   106  	ss, err := c.storageFromParam("src", c.src)
   107  	if err != nil {
   108  		return err
   109  	}
   110  	ds, err := c.storageFromParam("dest", c.dest)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	ts, err := c.storageFromParam("thirdleg", c.third)
   115  	if err != nil {
   116  		return err
   117  	}
   118  
   119  	differentKeyIDs := fmt.Sprintf("WARNING: the source server GPG key ID (%v) and the destination's (%v) differ. All blobs will be synced, but because the indexer at the other side is indexing claims by a different user, you may not see what you expect in that server's web UI, etc.", c.srcKeyID, c.destKeyID)
   120  
   121  	if c.dest != "stdout" && !c.oneIsDisk && c.srcKeyID != c.destKeyID { // both blank is ok.
   122  		// Warn at the top (and hope the user sees it and can abort if it was a mistake):
   123  		fmt.Fprintln(cmdmain.Stderr, differentKeyIDs)
   124  		// Warn also at the end (in case the user missed the first one)
   125  		defer fmt.Fprintln(cmdmain.Stderr, differentKeyIDs)
   126  	}
   127  
   128  	passNum := 0
   129  	for {
   130  		passNum++
   131  		stats, err := c.doPass(ss, ds, ts)
   132  		if c.verbose {
   133  			log.Printf("sync stats - pass: %d, blobs: %d, bytes %d\n", passNum, stats.BlobsCopied, stats.BytesCopied)
   134  		}
   135  		if err != nil {
   136  			return fmt.Errorf("sync failed: %v", err)
   137  		}
   138  		if !c.loop {
   139  			break
   140  		}
   141  	}
   142  	return nil
   143  }
   144  
   145  // A storageType is one of "src", "dest", or "thirdleg". These match the flag names.
   146  type storageType string
   147  
   148  const (
   149  	storageSource storageType = "src"
   150  	storageDest   storageType = "dest"
   151  	storageThird  storageType = "thirdleg"
   152  )
   153  
   154  // which is one of "src", "dest", or "thirdleg"
   155  func (c *syncCmd) storageFromParam(which storageType, val string) (blobserver.Storage, error) {
   156  	var httpClient *http.Client
   157  
   158  	if val == "" {
   159  		switch which {
   160  		case storageThird:
   161  			return nil, nil
   162  		case storageSource:
   163  			discl := c.discoClient()
   164  			discl.SetLogger(c.logger)
   165  			src, err := discl.BlobRoot()
   166  			if err != nil {
   167  				return nil, fmt.Errorf("Failed to discover source server's blob path: %v", err)
   168  			}
   169  			val = src
   170  			httpClient = discl.HTTPClient()
   171  		}
   172  		if val == "" {
   173  			return nil, cmdmain.UsageError("No --" + string(which) + " flag value specified")
   174  		}
   175  	}
   176  	if which == storageDest && val == "stdout" {
   177  		return nil, nil
   178  	}
   179  	if looksLikePath(val) {
   180  		disk, err := localdisk.New(val)
   181  		if err != nil {
   182  			return nil, fmt.Errorf("Interpreted --%v=%q as a local disk path, but got error: %v", which, val, err)
   183  		}
   184  		c.oneIsDisk = true
   185  		return disk, nil
   186  	}
   187  	cl := client.New(val)
   188  	cl.InsecureTLS = c.insecureTLS
   189  	if httpClient == nil {
   190  		httpClient = &http.Client{
   191  			Transport: cl.TransportForConfig(nil),
   192  		}
   193  	}
   194  	cl.SetHTTPClient(httpClient)
   195  	if err := cl.SetupAuth(); err != nil {
   196  		return nil, fmt.Errorf("could not setup auth for connecting to %v: %v", val, err)
   197  	}
   198  	cl.SetLogger(c.logger)
   199  	serverKeyID, err := cl.ServerKeyID()
   200  	if err != nil && err != client.ErrNoSigning {
   201  		fmt.Fprintf(cmdmain.Stderr, "Failed to discover keyId for server %v: %v", val, err)
   202  	} else {
   203  		if which == storageSource {
   204  			c.srcKeyID = serverKeyID
   205  		} else if which == storageDest {
   206  			c.destKeyID = serverKeyID
   207  		}
   208  	}
   209  	return cl, nil
   210  }
   211  
   212  func looksLikePath(v string) bool {
   213  	prefix := func(s string) bool { return strings.HasPrefix(v, s) }
   214  	return prefix("./") || prefix("/") || prefix("../")
   215  }
   216  
   217  type SyncStats struct {
   218  	BlobsCopied int
   219  	BytesCopied int64
   220  	ErrorCount  int
   221  }
   222  
   223  func (c *syncCmd) syncAll() error {
   224  	if c.loop {
   225  		return cmdmain.UsageError("--all can't be used with --loop")
   226  	}
   227  	if c.third != "" {
   228  		return cmdmain.UsageError("--all can't be used with --thirdleg")
   229  	}
   230  	if c.dest != "" {
   231  		return cmdmain.UsageError("--all can't be used with --dest")
   232  	}
   233  
   234  	dc := c.discoClient()
   235  	dc.SetLogger(c.logger)
   236  	syncHandlers, err := dc.SyncHandlers()
   237  	if err != nil {
   238  		return fmt.Errorf("sync handlers discovery failed: %v", err)
   239  	}
   240  	if c.verbose {
   241  		log.Printf("To be synced:\n")
   242  		for _, sh := range syncHandlers {
   243  			log.Printf("%v -> %v", sh.From, sh.To)
   244  		}
   245  	}
   246  	for _, sh := range syncHandlers {
   247  		from := client.New(sh.From)
   248  		from.SetLogger(c.logger)
   249  		from.InsecureTLS = c.insecureTLS
   250  		from.SetHTTPClient(&http.Client{
   251  			Transport: from.TransportForConfig(nil),
   252  		})
   253  		if err := from.SetupAuth(); err != nil {
   254  			return fmt.Errorf("could not setup auth for connecting to %v: %v", sh.From, err)
   255  		}
   256  		to := client.New(sh.To)
   257  		to.SetLogger(c.logger)
   258  		to.InsecureTLS = c.insecureTLS
   259  		to.SetHTTPClient(&http.Client{
   260  			Transport: to.TransportForConfig(nil),
   261  		})
   262  		if err := to.SetupAuth(); err != nil {
   263  			return fmt.Errorf("could not setup auth for connecting to %v: %v", sh.To, err)
   264  		}
   265  		if c.verbose {
   266  			log.Printf("Now syncing: %v -> %v", sh.From, sh.To)
   267  		}
   268  		stats, err := c.doPass(from, to, nil)
   269  		if c.verbose {
   270  			log.Printf("sync stats, blobs: %d, bytes %d\n", stats.BlobsCopied, stats.BytesCopied)
   271  		}
   272  		if err != nil {
   273  			return err
   274  		}
   275  	}
   276  	return nil
   277  }
   278  
   279  // discoClient returns a client initialized with a server
   280  // based from --src or from the configuration file if --src
   281  // is blank. The returned client can then be used to discover
   282  // the blobRoot and syncHandlers.
   283  func (c *syncCmd) discoClient() *client.Client {
   284  	cl := newClient(c.src)
   285  	cl.SetLogger(c.logger)
   286  	cl.InsecureTLS = c.insecureTLS
   287  	return cl
   288  }
   289  
   290  func enumerateAllBlobs(ctx *context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error {
   291  	// Use *client.Client's support for enumerating all blobs if
   292  	// possible, since it could probably do a better job knowing
   293  	// HTTP boundaries and such.
   294  	if c, ok := s.(*client.Client); ok {
   295  		return c.SimpleEnumerateBlobs(ctx, destc)
   296  	}
   297  
   298  	defer close(destc)
   299  	return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error {
   300  		select {
   301  		case destc <- sb:
   302  		case <-ctx.Done():
   303  			return context.ErrCanceled
   304  		}
   305  		return nil
   306  	})
   307  }
   308  
   309  // src: non-nil source
   310  // dest: non-nil destination
   311  // thirdLeg: optional third-leg client. if not nil, anything on src
   312  //     but not on dest will instead be copied to thirdLeg, instead of
   313  //     directly to dest. (sneakernet mode, copying to a portable drive
   314  //     and transporting thirdLeg to dest)
   315  func (c *syncCmd) doPass(src, dest, thirdLeg blobserver.Storage) (stats SyncStats, retErr error) {
   316  	srcBlobs := make(chan blob.SizedRef, 100)
   317  	destBlobs := make(chan blob.SizedRef, 100)
   318  	srcErr := make(chan error, 1)
   319  	destErr := make(chan error, 1)
   320  
   321  	ctx := context.TODO()
   322  	enumCtx := ctx.New() // used for all (2 or 3) enumerates
   323  	defer enumCtx.Cancel()
   324  	enumerate := func(errc chan<- error, sto blobserver.Storage, blobc chan<- blob.SizedRef) {
   325  		err := enumerateAllBlobs(enumCtx, sto, blobc)
   326  		if err != nil {
   327  			enumCtx.Cancel()
   328  		}
   329  		errc <- err
   330  	}
   331  
   332  	go enumerate(srcErr, src, srcBlobs)
   333  	checkSourceError := func() {
   334  		if err := <-srcErr; err != nil && err != context.ErrCanceled {
   335  			retErr = fmt.Errorf("Enumerate error from source: %v", err)
   336  		}
   337  	}
   338  
   339  	if c.dest == "stdout" {
   340  		for sb := range srcBlobs {
   341  			fmt.Fprintf(cmdmain.Stdout, "%s %d\n", sb.Ref, sb.Size)
   342  		}
   343  		checkSourceError()
   344  		return
   345  	}
   346  
   347  	if c.wipe {
   348  		// TODO(mpl): dest is a client. make it send a "wipe" request?
   349  		// upon reception its server then wipes itself if it is a wiper.
   350  		log.Print("Index wiping not yet supported.")
   351  	}
   352  
   353  	go enumerate(destErr, dest, destBlobs)
   354  	checkDestError := func() {
   355  		if err := <-destErr; err != nil && err != context.ErrCanceled {
   356  			retErr = fmt.Errorf("Enumerate error from destination: %v", err)
   357  		}
   358  	}
   359  
   360  	destNotHaveBlobs := make(chan blob.SizedRef)
   361  
   362  	readSrcBlobs := srcBlobs
   363  	if c.verbose {
   364  		readSrcBlobs = loggingBlobRefChannel(srcBlobs)
   365  	}
   366  
   367  	mismatches := []blob.Ref{}
   368  	onMismatch := func(br blob.Ref) {
   369  		// TODO(bradfitz): check both sides and repair, carefully.  For now, fail.
   370  		log.Printf("WARNING: blobref %v has differing sizes on source and dest", br)
   371  		stats.ErrorCount++
   372  		mismatches = append(mismatches, br)
   373  	}
   374  
   375  	go blobserver.ListMissingDestinationBlobs(destNotHaveBlobs, onMismatch, readSrcBlobs, destBlobs)
   376  
   377  	// Handle three-legged mode if tc is provided.
   378  	checkThirdError := func() {} // default nop
   379  	syncBlobs := destNotHaveBlobs
   380  	firstHopDest := dest
   381  	if thirdLeg != nil {
   382  		thirdBlobs := make(chan blob.SizedRef, 100)
   383  		thirdErr := make(chan error, 1)
   384  		go enumerate(thirdErr, thirdLeg, thirdBlobs)
   385  		checkThirdError = func() {
   386  			if err := <-thirdErr; err != nil && err != context.ErrCanceled {
   387  				retErr = fmt.Errorf("Enumerate error from third leg: %v", err)
   388  			}
   389  		}
   390  		thirdNeedBlobs := make(chan blob.SizedRef)
   391  		go blobserver.ListMissingDestinationBlobs(thirdNeedBlobs, onMismatch, destNotHaveBlobs, thirdBlobs)
   392  		syncBlobs = thirdNeedBlobs
   393  		firstHopDest = thirdLeg
   394  	}
   395  
   396  	for sb := range syncBlobs {
   397  		fmt.Fprintf(cmdmain.Stdout, "Destination needs blob: %s\n", sb)
   398  
   399  		blobReader, size, err := src.Fetch(sb.Ref)
   400  		if err != nil {
   401  			stats.ErrorCount++
   402  			log.Printf("Error fetching %s: %v", sb.Ref, err)
   403  			continue
   404  		}
   405  		if size != sb.Size {
   406  			stats.ErrorCount++
   407  			log.Printf("Source blobserver's enumerate size of %d for blob %s doesn't match its Get size of %d",
   408  				sb.Size, sb.Ref, size)
   409  			continue
   410  		}
   411  
   412  		if _, err := blobserver.Receive(firstHopDest, sb.Ref, blobReader); err != nil {
   413  			stats.ErrorCount++
   414  			log.Printf("Upload of %s to destination blobserver failed: %v", sb.Ref, err)
   415  			continue
   416  		}
   417  		stats.BlobsCopied++
   418  		stats.BytesCopied += int64(size)
   419  
   420  		if c.removeSrc {
   421  			if err = src.RemoveBlobs([]blob.Ref{sb.Ref}); err != nil {
   422  				stats.ErrorCount++
   423  				log.Printf("Failed to delete %s from source: %v", sb.Ref, err)
   424  			}
   425  		}
   426  	}
   427  
   428  	checkSourceError()
   429  	checkDestError()
   430  	checkThirdError()
   431  	if retErr == nil && stats.ErrorCount > 0 {
   432  		retErr = fmt.Errorf("%d errors during sync", stats.ErrorCount)
   433  	}
   434  	return stats, retErr
   435  }
   436  
   437  func loggingBlobRefChannel(ch <-chan blob.SizedRef) chan blob.SizedRef {
   438  	ch2 := make(chan blob.SizedRef)
   439  	go func() {
   440  		defer close(ch2)
   441  		var last time.Time
   442  		var nblob, nbyte int64
   443  		for v := range ch {
   444  			ch2 <- v
   445  			nblob++
   446  			nbyte += int64(v.Size)
   447  			now := time.Now()
   448  			if last.IsZero() || now.After(last.Add(1*time.Second)) {
   449  				last = now
   450  				log.Printf("At source blob %v (%d blobs, %d bytes)", v.Ref, nblob, nbyte)
   451  			}
   452  		}
   453  		log.Printf("Total blobs: %d, %d bytes", nblob, nbyte)
   454  	}()
   455  	return ch2
   456  }