github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/cmd/camput/files.go (about)

     1  /*
     2  Copyright 2011 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"bufio"
    21  	"crypto/sha1"
    22  	"errors"
    23  	"flag"
    24  	"fmt"
    25  	"hash"
    26  	"io"
    27  	"io/ioutil"
    28  	"log"
    29  	"net/http"
    30  	"os"
    31  	"path/filepath"
    32  	"sort"
    33  	"strconv"
    34  	"strings"
    35  	"sync"
    36  	"sync/atomic"
    37  	"time"
    38  
    39  	"camlistore.org/pkg/blob"
    40  	"camlistore.org/pkg/blobserver"
    41  	statspkg "camlistore.org/pkg/blobserver/stats"
    42  	"camlistore.org/pkg/client"
    43  	"camlistore.org/pkg/client/android"
    44  	"camlistore.org/pkg/cmdmain"
    45  	"camlistore.org/pkg/schema"
    46  )
    47  
    48  type fileCmd struct {
    49  	title string
    50  	tag   string
    51  
    52  	makePermanode     bool // make new, unique permanode of the root (dir or file)
    53  	filePermanodes    bool // make planned permanodes for each file (based on their digest)
    54  	vivify            bool
    55  	exifTime          bool // use metadata (such as in EXIF) to find the creation time of the file
    56  	capCtime          bool // use mtime as creation time of the file, if it would be bigger than modification time
    57  	diskUsage         bool // show "du" disk usage only (dry run mode), don't actually upload
    58  	argsFromInput     bool // Android mode: filenames piped into stdin, one at a time.
    59  	deleteAfterUpload bool // with fileNodes, deletes the input file once uploaded
    60  
    61  	statcache bool
    62  
    63  	// Go into in-memory stats mode only; doesn't actually upload.
    64  	memstats bool
    65  	histo    string // optional histogram output filename
    66  }
    67  
    68  var flagUseSQLiteChildCache bool // Use sqlite for the statcache and havecache.
    69  
    70  var (
    71  	uploadWorkers    = 5 // concurrent upload workers (negative means unbounded: memory hog)
    72  	dirUploadWorkers = 3 // concurrent directory uploading workers
    73  	statCacheWorkers = 5 // concurrent statcache workers
    74  )
    75  
    76  func init() {
    77  	cmdmain.RegisterCommand("file", func(flags *flag.FlagSet) cmdmain.CommandRunner {
    78  		cmd := new(fileCmd)
    79  		flags.BoolVar(&cmd.makePermanode, "permanode", false, "Create an associate a new permanode for the uploaded file or directory.")
    80  		flags.BoolVar(&cmd.filePermanodes, "filenodes", false, "Create (if necessary) content-based permanodes for each uploaded file.")
    81  		flags.BoolVar(&cmd.deleteAfterUpload, "delete_after_upload", false, "If using -filenodes, deletes files once they're uploaded, of if they've already been uploaded.")
    82  		flags.BoolVar(&cmd.vivify, "vivify", false,
    83  			"If true, ask the server to create and sign permanode(s) associated with each uploaded"+
    84  				" file. This permits the server to have your signing key. Used mostly with untrusted"+
    85  				" or at-risk clients, such as phones.")
    86  		flags.BoolVar(&cmd.exifTime, "exiftime", false, "Try to use metadata (such as EXIF) to get a stable creation time. If found, used as the replacement for the modtime. Mainly useful with vivify or filenodes.")
    87  		flags.StringVar(&cmd.title, "title", "", "Optional title attribute to set on permanode when using -permanode.")
    88  		flags.StringVar(&cmd.tag, "tag", "", "Optional tag(s) to set on permanode when using -permanode or -filenodes. Single value or comma separated.")
    89  
    90  		flags.BoolVar(&cmd.diskUsage, "du", false, "Dry run mode: only show disk usage information, without upload or statting dest. Used for testing skipDirs configs, mostly.")
    91  
    92  		if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug {
    93  			flags.BoolVar(&cmd.statcache, "statcache", true, "Use the stat cache, assuming unchanged files already uploaded in the past are still there. Fast, but potentially dangerous.")
    94  			flags.BoolVar(&cmd.memstats, "debug-memstats", false, "Enter debug in-memory mode; collecting stats only. Doesn't upload anything.")
    95  			flags.StringVar(&cmd.histo, "debug-histogram-file", "", "Optional file to create and write the blob size for each file uploaded.  For use with GNU R and hist(read.table(\"filename\")$V1). Requires debug-memstats.")
    96  			flags.BoolVar(&cmd.capCtime, "capctime", false, "For file blobs use file modification time as creation time if it would be bigger (newer) than modification time. For stable filenode creation (you can forge mtime, but can't forge ctime).")
    97  			flags.BoolVar(&flagUseSQLiteChildCache, "sqlitecache", false, "Use sqlite for the statcache and havecache instead of a flat cache.")
    98  		} else {
    99  			cmd.statcache = true
   100  		}
   101  		if android.IsChild() {
   102  			flags.BoolVar(&cmd.argsFromInput, "stdinargs", false, "If true, filenames to upload are sent one-per-line on stdin. EOF means to quit the process with exit status 0.")
   103  			// limit number of goroutines to limit memory
   104  			uploadWorkers = 2
   105  			dirUploadWorkers = 2
   106  			statCacheWorkers = 2
   107  		}
   108  		flagCacheLog = flags.Bool("logcache", false, "log caching details")
   109  
   110  		return cmd
   111  	})
   112  }
   113  
   114  func (c *fileCmd) Describe() string {
   115  	return "Upload file(s)."
   116  }
   117  
   118  func (c *fileCmd) Usage() {
   119  	fmt.Fprintf(cmdmain.Stderr, "Usage: camput [globalopts] file [fileopts] <file/director(ies)>\n")
   120  }
   121  
   122  func (c *fileCmd) Examples() []string {
   123  	return []string{
   124  		"[opts] <file(s)/director(ies)",
   125  		"--permanode --title='Homedir backup' --tag=backup,homedir $HOME",
   126  		"--filenodes /mnt/camera/DCIM",
   127  	}
   128  }
   129  
   130  func (c *fileCmd) RunCommand(args []string) error {
   131  	if c.vivify {
   132  		if c.makePermanode || c.filePermanodes || c.tag != "" || c.title != "" {
   133  			return cmdmain.UsageError("--vivify excludes any other option")
   134  		}
   135  	}
   136  	if c.title != "" && !c.makePermanode {
   137  		return cmdmain.UsageError("Can't set title without using --permanode")
   138  	}
   139  	if c.tag != "" && !c.makePermanode && !c.filePermanodes {
   140  		return cmdmain.UsageError("Can't set tag without using --permanode or --filenodes")
   141  	}
   142  	if c.histo != "" && !c.memstats {
   143  		return cmdmain.UsageError("Can't use histo without memstats")
   144  	}
   145  	if c.deleteAfterUpload && !c.filePermanodes {
   146  		return cmdmain.UsageError("Can't set use --delete_after_upload without --filenodes")
   147  	}
   148  	up := getUploader()
   149  	if c.memstats {
   150  		sr := new(statspkg.Receiver)
   151  		up.altStatReceiver = sr
   152  		defer func() { DumpStats(sr, c.histo) }()
   153  	}
   154  	c.initCaches(up)
   155  
   156  	if c.makePermanode || c.filePermanodes {
   157  		testSigBlobRef := up.Client.SignerPublicKeyBlobref()
   158  		if !testSigBlobRef.Valid() {
   159  			return cmdmain.UsageError("A GPG key is needed to create permanodes; configure one or use vivify mode.")
   160  		}
   161  	}
   162  	up.fileOpts = &fileOptions{
   163  		permanode: c.filePermanodes,
   164  		tag:       c.tag,
   165  		vivify:    c.vivify,
   166  		exifTime:  c.exifTime,
   167  		capCtime:  c.capCtime,
   168  	}
   169  
   170  	var (
   171  		permaNode *client.PutResult
   172  		lastPut   *client.PutResult
   173  		err       error
   174  	)
   175  	if c.makePermanode {
   176  		if len(args) != 1 {
   177  			return fmt.Errorf("The --permanode flag can only be used with exactly one file or directory argument")
   178  		}
   179  		permaNode, err = up.UploadNewPermanode()
   180  		if err != nil {
   181  			return fmt.Errorf("Uploading permanode: %v", err)
   182  		}
   183  	}
   184  	if c.diskUsage {
   185  		if len(args) != 1 {
   186  			return fmt.Errorf("The --du flag can only be used with exactly one directory argument")
   187  		}
   188  		dir := args[0]
   189  		fi, err := up.stat(dir)
   190  		if err != nil {
   191  			return err
   192  		}
   193  		if !fi.IsDir() {
   194  			return fmt.Errorf("%q is not a directory.", dir)
   195  		}
   196  		t := up.NewTreeUpload(dir)
   197  		t.DiskUsageMode = true
   198  		t.Start()
   199  		pr, err := t.Wait()
   200  		if err != nil {
   201  			return err
   202  		}
   203  		handleResult("tree-upload", pr, err)
   204  		return nil
   205  	}
   206  	if c.argsFromInput {
   207  		if len(args) > 0 {
   208  			return errors.New("args not supported with -argsfrominput")
   209  		}
   210  		tu := up.NewRootlessTreeUpload()
   211  		tu.Start()
   212  		br := bufio.NewReader(os.Stdin)
   213  		for {
   214  			path, err := br.ReadString('\n')
   215  			if path = strings.TrimSpace(path); path != "" {
   216  				tu.Enqueue(path)
   217  			}
   218  			if err == io.EOF {
   219  				android.PreExit()
   220  				os.Exit(0)
   221  			}
   222  			if err != nil {
   223  				log.Fatal(err)
   224  			}
   225  		}
   226  	}
   227  
   228  	if len(args) == 0 {
   229  		return cmdmain.UsageError("No files or directories given.")
   230  	}
   231  	if up.statCache != nil {
   232  		defer up.statCache.Close()
   233  	}
   234  	for _, filename := range args {
   235  		fi, err := os.Stat(filename)
   236  		if err != nil {
   237  			return err
   238  		}
   239  		// Skip ignored files or base directories.  Failing to skip the
   240  		// latter results in a panic.
   241  		if up.Client.IsIgnoredFile(filename) {
   242  			log.Printf("Client configured to ignore %s; skipping.", filename)
   243  			continue
   244  		}
   245  		if fi.IsDir() {
   246  			if up.fileOpts.wantVivify() {
   247  				vlog.Printf("Directories not supported in vivify mode; skipping %v\n", filename)
   248  				continue
   249  			}
   250  			if !*cmdmain.FlagVerbose {
   251  				log.SetOutput(ioutil.Discard)
   252  			}
   253  			t := up.NewTreeUpload(filename)
   254  			t.Start()
   255  			lastPut, err = t.Wait()
   256  		} else {
   257  			lastPut, err = up.UploadFile(filename)
   258  			if err == nil && c.deleteAfterUpload {
   259  				if err := os.Remove(filename); err != nil {
   260  					log.Printf("Error deleting %v: %v", filename, err)
   261  				} else {
   262  					log.Printf("Deleted %v", filename)
   263  				}
   264  			}
   265  		}
   266  		if handleResult("file", lastPut, err) != nil {
   267  			return err
   268  		}
   269  	}
   270  
   271  	if permaNode != nil && lastPut != nil {
   272  		put, err := up.UploadAndSignBlob(schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", lastPut.BlobRef.String()))
   273  		if handleResult("claim-permanode-content", put, err) != nil {
   274  			return err
   275  		}
   276  		if c.title != "" {
   277  			put, err := up.UploadAndSignBlob(schema.NewSetAttributeClaim(permaNode.BlobRef, "title", c.title))
   278  			handleResult("claim-permanode-title", put, err)
   279  		}
   280  		if c.tag != "" {
   281  			tags := strings.Split(c.tag, ",")
   282  			for _, tag := range tags {
   283  				m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag)
   284  				put, err := up.UploadAndSignBlob(m)
   285  				handleResult("claim-permanode-tag", put, err)
   286  			}
   287  		}
   288  		handleResult("permanode", permaNode, nil)
   289  	}
   290  	return nil
   291  }
   292  
   293  func (c *fileCmd) initCaches(up *Uploader) {
   294  	if !c.statcache || *flagBlobDir != "" {
   295  		return
   296  	}
   297  	gen, err := up.StorageGeneration()
   298  	if err != nil {
   299  		log.Printf("WARNING: not using local caches; failed to retrieve server's storage generation: %v", err)
   300  		return
   301  	}
   302  	if c.statcache {
   303  		up.statCache = NewKvStatCache(gen)
   304  	}
   305  }
   306  
   307  // DumpStats creates the destFile and writes a line per received blob,
   308  // with its blob size.
   309  func DumpStats(sr *statspkg.Receiver, destFile string) {
   310  	sr.Lock()
   311  	defer sr.Unlock()
   312  
   313  	f, err := os.Create(destFile)
   314  	if err != nil {
   315  		log.Fatal(err)
   316  	}
   317  
   318  	var sum int64
   319  	for _, size := range sr.Have {
   320  		fmt.Fprintf(f, "%d\n", size)
   321  	}
   322  	fmt.Printf("In-memory blob stats: %d blobs, %d bytes\n", len(sr.Have), sum)
   323  
   324  	err = f.Close()
   325  	if err != nil {
   326  		log.Fatal(err)
   327  	}
   328  }
   329  
   330  type stats struct {
   331  	files, bytes int64
   332  }
   333  
   334  func (s *stats) incr(n *node) {
   335  	s.files++
   336  	if !n.fi.IsDir() {
   337  		s.bytes += n.fi.Size()
   338  	}
   339  }
   340  
   341  func (up *Uploader) lstat(path string) (os.FileInfo, error) {
   342  	// TODO(bradfitz): use VFS
   343  	return os.Lstat(path)
   344  }
   345  
   346  func (up *Uploader) stat(path string) (os.FileInfo, error) {
   347  	if up.fs == nil {
   348  		return os.Stat(path)
   349  	}
   350  	f, err := up.fs.Open(path)
   351  	if err != nil {
   352  		return nil, err
   353  	}
   354  	defer f.Close()
   355  	return f.Stat()
   356  }
   357  
   358  func (up *Uploader) open(path string) (http.File, error) {
   359  	if up.fs == nil {
   360  		return os.Open(path)
   361  	}
   362  	return up.fs.Open(path)
   363  }
   364  
   365  func (n *node) directoryStaticSet() (*schema.StaticSet, error) {
   366  	ss := new(schema.StaticSet)
   367  	for _, c := range n.children {
   368  		pr, err := c.PutResult()
   369  		if err != nil {
   370  			return nil, fmt.Errorf("Error populating directory static set for child %q: %v", c.fullPath, err)
   371  		}
   372  		ss.Add(pr.BlobRef)
   373  	}
   374  	return ss, nil
   375  }
   376  
   377  func (up *Uploader) uploadNode(n *node) (*client.PutResult, error) {
   378  	fi := n.fi
   379  	mode := fi.Mode()
   380  	if mode&os.ModeType == 0 {
   381  		return up.uploadNodeRegularFile(n)
   382  	}
   383  	bb := schema.NewCommonFileMap(n.fullPath, fi)
   384  	switch {
   385  	case mode&os.ModeSymlink != 0:
   386  		// TODO(bradfitz): use VFS here; not os.Readlink
   387  		target, err := os.Readlink(n.fullPath)
   388  		if err != nil {
   389  			return nil, err
   390  		}
   391  		bb.SetSymlinkTarget(target)
   392  	case mode&os.ModeDevice != 0:
   393  		// including mode & os.ModeCharDevice
   394  		fallthrough
   395  	case mode&os.ModeSocket != 0:
   396  		fallthrough
   397  	case mode&os.ModeNamedPipe != 0: // FIFO
   398  		fallthrough
   399  	default:
   400  		return nil, fmt.Errorf("camput.files: unsupported file type %v for file %v", mode, n.fullPath)
   401  	case fi.IsDir():
   402  		ss, err := n.directoryStaticSet()
   403  		if err != nil {
   404  			return nil, err
   405  		}
   406  		sspr, err := up.UploadBlob(ss)
   407  		if err != nil {
   408  			return nil, err
   409  		}
   410  		bb.PopulateDirectoryMap(sspr.BlobRef)
   411  	}
   412  
   413  	mappr, err := up.UploadBlob(bb)
   414  	if err == nil {
   415  		if !mappr.Skipped {
   416  			vlog.Printf("Uploaded %q, %s for %s", bb.Type(), mappr.BlobRef, n.fullPath)
   417  		}
   418  	} else {
   419  		vlog.Printf("Error uploading map for %s (%s, %s): %v", n.fullPath, bb.Type(), bb.Blob().BlobRef(), err)
   420  	}
   421  	return mappr, err
   422  
   423  }
   424  
   425  // statReceiver returns the StatReceiver used for checking for and uploading blobs.
   426  //
   427  // The optional provided node is only used for conditionally printing out status info to stdout.
   428  func (up *Uploader) statReceiver(n *node) blobserver.StatReceiver {
   429  	statReceiver := up.altStatReceiver
   430  	if statReceiver == nil {
   431  		// TODO(mpl): simplify the altStatReceiver situation as well,
   432  		// see TODO in cmd/camput/uploader.go
   433  		statReceiver = up.Client
   434  	}
   435  	if android.IsChild() && n != nil && n.fi.Mode()&os.ModeType == 0 {
   436  		return android.StatusReceiver{Sr: statReceiver, Path: n.fullPath}
   437  	}
   438  	return statReceiver
   439  }
   440  
   441  func (up *Uploader) noStatReceiver(r blobserver.BlobReceiver) blobserver.StatReceiver {
   442  	return noStatReceiver{r}
   443  }
   444  
   445  // A haveCacheStatReceiver relays Receive calls to the embedded
   446  // BlobReceiver and treats all Stat calls like the blob doesn't exist.
   447  //
   448  // This is used by the client once it's already asked the server that
   449  // it doesn't have the whole file in some chunk layout already, so we
   450  // know we're just writing new stuff. For resuming in the middle of
   451  // larger uploads, it turns out that the pkg/client.Client.Upload
   452  // already checks the have cache anyway, so going right to mid-chunk
   453  // receives is fine.
   454  //
   455  // TODO(bradfitz): this probabaly all needs an audit/rationalization/tests
   456  // to make sure all the players are agreeing on the responsibilities.
   457  // And maybe the Android stats are wrong, too. (see pkg/client/android's
   458  // StatReceiver)
   459  type noStatReceiver struct {
   460  	blobserver.BlobReceiver
   461  }
   462  
   463  func (noStatReceiver) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
   464  	return nil
   465  }
   466  
   467  var atomicDigestOps int64 // number of files digested
   468  
   469  // wholeFileDigest returns the sha1 digest of the regular file's absolute
   470  // path given in fullPath.
   471  func (up *Uploader) wholeFileDigest(fullPath string) (blob.Ref, error) {
   472  	// TODO(bradfitz): cache this.
   473  	file, err := up.open(fullPath)
   474  	if err != nil {
   475  		return blob.Ref{}, err
   476  	}
   477  	defer file.Close()
   478  	td := &trackDigestReader{r: file}
   479  	_, err = io.Copy(ioutil.Discard, td)
   480  	atomic.AddInt64(&atomicDigestOps, 1)
   481  	if err != nil {
   482  		return blob.Ref{}, err
   483  	}
   484  	return blob.MustParse(td.Sum()), nil
   485  }
   486  
   487  var noDupSearch, _ = strconv.ParseBool(os.Getenv("CAMLI_NO_FILE_DUP_SEARCH"))
   488  
   489  // fileMapFromDuplicate queries the server's search interface for an
   490  // existing file with an entire contents of sum (a blobref string).
   491  // If the server has it, it's validated, and then fileMap (which must
   492  // already be partially populated) has its "parts" field populated,
   493  // and then fileMap is uploaded (if necessary) and a PutResult with
   494  // its blobref is returned. If there's any problem, or a dup doesn't
   495  // exist, ok is false.
   496  // If required, Vivify is also done here.
   497  func (up *Uploader) fileMapFromDuplicate(bs blobserver.StatReceiver, fileMap *schema.Builder, sum string) (pr *client.PutResult, ok bool) {
   498  	if noDupSearch {
   499  		return
   500  	}
   501  	_, err := up.Client.SearchRoot()
   502  	if err != nil {
   503  		return
   504  	}
   505  	dupFileRef, err := up.Client.SearchExistingFileSchema(blob.MustParse(sum))
   506  	if err != nil {
   507  		log.Printf("Warning: error searching for already-uploaded copy of %s: %v", sum, err)
   508  		return nil, false
   509  	}
   510  	if !dupFileRef.Valid() {
   511  		return nil, false
   512  	}
   513  	if *cmdmain.FlagVerbose {
   514  		log.Printf("Found dup of contents %s in file schema %s", sum, dupFileRef)
   515  	}
   516  	dupMap, err := up.Client.FetchSchemaBlob(dupFileRef)
   517  	if err != nil {
   518  		log.Printf("Warning: error fetching %v: %v", dupFileRef, err)
   519  		return nil, false
   520  	}
   521  
   522  	fileMap.PopulateParts(dupMap.PartsSize(), dupMap.ByteParts())
   523  
   524  	json, err := fileMap.JSON()
   525  	if err != nil {
   526  		return nil, false
   527  	}
   528  	uh := client.NewUploadHandleFromString(json)
   529  	if up.fileOpts.wantVivify() {
   530  		uh.Vivify = true
   531  	}
   532  	if !uh.Vivify && uh.BlobRef == dupFileRef {
   533  		// Unchanged (same filename, modtime, JSON serialization, etc)
   534  		return &client.PutResult{BlobRef: dupFileRef, Size: uint32(len(json)), Skipped: true}, true
   535  	}
   536  	pr, err = up.Upload(uh)
   537  	if err != nil {
   538  		log.Printf("Warning: error uploading file map after finding server dup of %v: %v", sum, err)
   539  		return nil, false
   540  	}
   541  	return pr, true
   542  }
   543  
   544  func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) {
   545  	filebb := schema.NewCommonFileMap(n.fullPath, n.fi)
   546  	filebb.SetType("file")
   547  
   548  	up.fdGate.Start()
   549  	defer up.fdGate.Done()
   550  
   551  	file, err := up.open(n.fullPath)
   552  	if err != nil {
   553  		return nil, err
   554  	}
   555  	defer file.Close()
   556  	if up.fileOpts.exifTime {
   557  		ra, ok := file.(io.ReaderAt)
   558  		if !ok {
   559  			return nil, errors.New("Error asserting local file to io.ReaderAt")
   560  		}
   561  		modtime, err := schema.FileTime(ra)
   562  		if err != nil {
   563  			log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err)
   564  		} else {
   565  			filebb.SetModTime(modtime)
   566  		}
   567  	}
   568  	if up.fileOpts.capCtime {
   569  		filebb.CapCreationTime()
   570  	}
   571  
   572  	var (
   573  		size                           = n.fi.Size()
   574  		fileContents io.Reader         = io.LimitReader(file, size)
   575  		br           blob.Ref          // of file schemaref
   576  		sum          string            // sha1 hashsum of the file to upload
   577  		pr           *client.PutResult // of the final "file" schema blob
   578  	)
   579  
   580  	const dupCheckThreshold = 256 << 10
   581  	if size > dupCheckThreshold {
   582  		sumRef, err := up.wholeFileDigest(n.fullPath)
   583  		if err == nil {
   584  			sum = sumRef.String()
   585  			ok := false
   586  			pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum)
   587  			if ok {
   588  				br = pr.BlobRef
   589  				android.NoteFileUploaded(n.fullPath, !pr.Skipped)
   590  				if up.fileOpts.wantVivify() {
   591  					// we can return early in that case, because the other options
   592  					// are disallowed in the vivify case.
   593  					return pr, nil
   594  				}
   595  			}
   596  		}
   597  	}
   598  
   599  	if up.fileOpts.wantVivify() {
   600  		// If vivify wasn't already done in fileMapFromDuplicate.
   601  		err := schema.WriteFileChunks(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents)
   602  		if err != nil {
   603  			return nil, err
   604  		}
   605  		json, err := filebb.JSON()
   606  		if err != nil {
   607  			return nil, err
   608  		}
   609  		br = blob.SHA1FromString(json)
   610  		h := &client.UploadHandle{
   611  			BlobRef:  br,
   612  			Size:     uint32(len(json)),
   613  			Contents: strings.NewReader(json),
   614  			Vivify:   true,
   615  		}
   616  		pr, err = up.Upload(h)
   617  		if err != nil {
   618  			return nil, err
   619  		}
   620  		android.NoteFileUploaded(n.fullPath, true)
   621  		return pr, nil
   622  	}
   623  
   624  	if !br.Valid() {
   625  		// br still zero means fileMapFromDuplicate did not find the file on the server,
   626  		// and the file has not just been uploaded subsequently to a vivify request.
   627  		// So we do the full file + file schema upload here.
   628  		if sum == "" && up.fileOpts.wantFilePermanode() {
   629  			fileContents = &trackDigestReader{r: fileContents}
   630  		}
   631  		br, err = schema.WriteFileMap(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents)
   632  		if err != nil {
   633  			return nil, err
   634  		}
   635  	}
   636  
   637  	// The work for those planned permanodes (and the claims) is redone
   638  	// everytime we get here (i.e past the stat cache). However, they're
   639  	// caught by the have cache, so they won't be reuploaded for nothing
   640  	// at least.
   641  	if up.fileOpts.wantFilePermanode() {
   642  		if td, ok := fileContents.(*trackDigestReader); ok {
   643  			sum = td.Sum()
   644  		}
   645  		// claimTime is both the time of the "claimDate" in the
   646  		// JSON claim, as well as the date in the OpenPGP
   647  		// header.
   648  		// TODO(bradfitz): this is a little clumsy to do by hand.
   649  		// There should probably be a method on *Uploader to do this
   650  		// from an unsigned schema map. Maybe ditch the schema.Claimer
   651  		// type and just have the Uploader override the claimDate.
   652  		claimTime, ok := filebb.ModTime()
   653  		if !ok {
   654  			return nil, fmt.Errorf("couldn't get modtime for file %v", n.fullPath)
   655  		}
   656  		err = up.uploadFilePermanode(sum, br, claimTime)
   657  		if err != nil {
   658  			return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err)
   659  		}
   660  	}
   661  
   662  	// TODO(bradfitz): faking a PutResult here to return
   663  	// is kinda gross.  should instead make a
   664  	// blobserver.Storage wrapper type (wrapping
   665  	// statReceiver) that can track some of this?  or make
   666  	// schemaWriteFileMap return it?
   667  	json, _ := filebb.JSON()
   668  	pr = &client.PutResult{BlobRef: br, Size: uint32(len(json)), Skipped: false}
   669  	return pr, nil
   670  }
   671  
   672  // uploadFilePermanode creates and uploads the planned permanode (with sum as a
   673  // fixed key) associated with the file blobref fileRef.
   674  // It also sets the optional tags for this permanode.
   675  func (up *Uploader) uploadFilePermanode(sum string, fileRef blob.Ref, claimTime time.Time) error {
   676  	// Use a fixed time value for signing; not using modtime
   677  	// so two identical files don't have different modtimes?
   678  	// TODO(bradfitz): consider this more?
   679  	permaNodeSigTime := time.Unix(0, 0)
   680  	permaNode, err := up.UploadPlannedPermanode(sum, permaNodeSigTime)
   681  	if err != nil {
   682  		return fmt.Errorf("Error uploading planned permanode: %v", err)
   683  	}
   684  	handleResult("node-permanode", permaNode, nil)
   685  
   686  	contentAttr := schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", fileRef.String())
   687  	contentAttr.SetClaimDate(claimTime)
   688  	signer, err := up.Signer()
   689  	if err != nil {
   690  		return err
   691  	}
   692  	signed, err := contentAttr.SignAt(signer, claimTime)
   693  	if err != nil {
   694  		return fmt.Errorf("Failed to sign content claim: %v", err)
   695  	}
   696  	put, err := up.uploadString(signed)
   697  	if err != nil {
   698  		return fmt.Errorf("Error uploading permanode's attribute: %v", err)
   699  	}
   700  
   701  	handleResult("node-permanode-contentattr", put, nil)
   702  	if tags := up.fileOpts.tags(); len(tags) > 0 {
   703  		errch := make(chan error)
   704  		for _, tag := range tags {
   705  			go func(tag string) {
   706  				m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag)
   707  				m.SetClaimDate(claimTime)
   708  				signed, err := m.SignAt(signer, claimTime)
   709  				if err != nil {
   710  					errch <- fmt.Errorf("Failed to sign tag claim: %v", err)
   711  					return
   712  				}
   713  				put, err := up.uploadString(signed)
   714  				if err != nil {
   715  					errch <- fmt.Errorf("Error uploading permanode's tag attribute %v: %v", tag, err)
   716  					return
   717  				}
   718  				handleResult("node-permanode-tag", put, nil)
   719  				errch <- nil
   720  			}(tag)
   721  		}
   722  
   723  		for _ = range tags {
   724  			if e := <-errch; e != nil && err == nil {
   725  				err = e
   726  			}
   727  		}
   728  		if err != nil {
   729  			return err
   730  		}
   731  	}
   732  	return nil
   733  }
   734  
   735  func (up *Uploader) UploadFile(filename string) (*client.PutResult, error) {
   736  	fullPath, err := filepath.Abs(filename)
   737  	if err != nil {
   738  		return nil, err
   739  	}
   740  	fi, err := up.lstat(fullPath)
   741  	if err != nil {
   742  		return nil, err
   743  	}
   744  
   745  	if fi.IsDir() {
   746  		panic("must use UploadTree now for directories")
   747  	}
   748  	n := &node{
   749  		fullPath: fullPath,
   750  		fi:       fi,
   751  	}
   752  
   753  	withPermanode := up.fileOpts.wantFilePermanode()
   754  	if up.statCache != nil && !up.fileOpts.wantVivify() {
   755  		// Note: ignoring cache hits if wantVivify, otherwise
   756  		// a non-vivify put followed by a vivify one wouldn't
   757  		// end up doing the vivify.
   758  		if cachedRes, err := up.statCache.CachedPutResult(
   759  			up.pwd, n.fullPath, n.fi, withPermanode); err == nil {
   760  			return cachedRes, nil
   761  		}
   762  	}
   763  
   764  	pr, err := up.uploadNode(n)
   765  	if err == nil && up.statCache != nil {
   766  		up.statCache.AddCachedPutResult(
   767  			up.pwd, n.fullPath, n.fi, pr, withPermanode)
   768  	}
   769  
   770  	return pr, err
   771  }
   772  
   773  // NewTreeUpload returns a TreeUpload. It doesn't begin uploading any files until a
   774  // call to Start
   775  func (up *Uploader) NewTreeUpload(dir string) *TreeUpload {
   776  	tu := up.NewRootlessTreeUpload()
   777  	tu.rootless = false
   778  	tu.base = dir
   779  	return tu
   780  }
   781  
   782  func (up *Uploader) NewRootlessTreeUpload() *TreeUpload {
   783  	return &TreeUpload{
   784  		rootless: true,
   785  		base:     "",
   786  		up:       up,
   787  		donec:    make(chan bool, 1),
   788  		errc:     make(chan error, 1),
   789  		stattedc: make(chan *node, buffered),
   790  	}
   791  }
   792  
   793  func (t *TreeUpload) Start() {
   794  	go t.run()
   795  }
   796  
   797  type node struct {
   798  	tu       *TreeUpload // nil if not doing a tree upload
   799  	fullPath string
   800  	fi       os.FileInfo
   801  	children []*node
   802  
   803  	// cond (and its &mu Lock) guard err and res.
   804  	cond sync.Cond // with L being &mu
   805  	mu   sync.Mutex
   806  	err  error
   807  	res  *client.PutResult
   808  
   809  	sumBytes int64 // cached value, if non-zero. also guarded by mu.
   810  }
   811  
   812  func (n *node) String() string {
   813  	if n == nil {
   814  		return "<nil *node>"
   815  	}
   816  	return fmt.Sprintf("[node %s, isDir=%v, nchild=%d]", n.fullPath, n.fi.IsDir(), len(n.children))
   817  }
   818  
   819  func (n *node) SetPutResult(res *client.PutResult, err error) {
   820  	n.mu.Lock()
   821  	defer n.mu.Unlock()
   822  	if res == nil && err == nil {
   823  		panic("SetPutResult called with (nil, nil)")
   824  	}
   825  	if n.res != nil || n.err != nil {
   826  		panic("SetPutResult called twice on node " + n.fullPath)
   827  	}
   828  	n.res, n.err = res, err
   829  	n.cond.Signal()
   830  }
   831  
   832  func (n *node) PutResult() (*client.PutResult, error) {
   833  	n.mu.Lock()
   834  	defer n.mu.Unlock()
   835  	for n.err == nil && n.res == nil {
   836  		n.cond.Wait()
   837  	}
   838  	return n.res, n.err
   839  }
   840  
   841  func (n *node) SumBytes() (v int64) {
   842  	n.mu.Lock()
   843  	defer n.mu.Unlock()
   844  	if n.sumBytes != 0 {
   845  		return n.sumBytes
   846  	}
   847  	for _, c := range n.children {
   848  		v += c.SumBytes()
   849  	}
   850  	if n.fi.Mode()&os.ModeType == 0 {
   851  		v += n.fi.Size()
   852  	}
   853  	n.sumBytes = v
   854  	return
   855  }
   856  
   857  /*
   858  A TreeUpload holds the state of an ongoing recursive directory tree
   859  upload.  Call Wait to get the final result.
   860  
   861  Uploading a directory tree involves several concurrent processes, each
   862  which may involve multiple goroutines:
   863  
   864  1) one process stats all files and walks all directories as fast as possible
   865     to calculate how much total work there will be.  this goroutine also
   866     filters out directories to be skipped. (caches, temp files, skipDirs, etc)
   867  
   868   2) one process works though the files that were discovered and checks
   869      the statcache to see what actually needs to be uploaded.
   870      The statcache is
   871          full path => {last os.FileInfo signature, put result from last time}
   872      and is used to avoid re-reading/digesting the file even locally,
   873      trusting that it's already on the server.
   874  
   875   3) one process uploads files & metadata.  This process checks the "havecache"
   876      to see which blobs are already on the server.  For awhile the local havecache
   877      (if configured) and the remote blobserver "stat" RPC are raced to determine
   878      if the local havecache is even faster. If not, it's not consulted. But if the
   879      latency of remote stats is high enough, checking locally is preferred.
   880  */
   881  type TreeUpload struct {
   882  	// If DiskUsageMode is set true before Start, only
   883  	// per-directory disk usage stats are output, like the "du"
   884  	// command.
   885  	DiskUsageMode bool
   886  
   887  	// Immutable:
   888  	rootless bool   // if true, "base" will be empty.
   889  	base     string // base directory
   890  	up       *Uploader
   891  	stattedc chan *node // from stat-the-world goroutine to run()
   892  
   893  	donec chan bool // closed when run() finishes
   894  	err   error
   895  	errc  chan error // with 1 buffer item
   896  
   897  	// Owned by run goroutine:
   898  	total    stats // total bytes on disk
   899  	skipped  stats // not even tried to upload (trusting stat cache)
   900  	uploaded stats // uploaded (even if server said it already had it and bytes weren't sent)
   901  
   902  	finalPutRes *client.PutResult // set after run() returns
   903  }
   904  
   905  // Enqueue starts uploading path (a file, directory, etc).
   906  func (t *TreeUpload) Enqueue(path string) {
   907  	t.statPath(path, nil)
   908  }
   909  
   910  // fi is optional (will be statted if nil)
   911  func (t *TreeUpload) statPath(fullPath string, fi os.FileInfo) (nod *node, err error) {
   912  	defer func() {
   913  		if err == nil && nod != nil {
   914  			t.stattedc <- nod
   915  		}
   916  	}()
   917  	if t.up.Client.IsIgnoredFile(fullPath) {
   918  		return nil, nil
   919  	}
   920  	if fi == nil {
   921  		fi, err = t.up.lstat(fullPath)
   922  		if err != nil {
   923  			return nil, err
   924  		}
   925  	}
   926  	n := &node{
   927  		tu:       t,
   928  		fullPath: fullPath,
   929  		fi:       fi,
   930  	}
   931  	n.cond.L = &n.mu
   932  
   933  	if !fi.IsDir() {
   934  		return n, nil
   935  	}
   936  	f, err := t.up.open(fullPath)
   937  	if err != nil {
   938  		return nil, err
   939  	}
   940  	fis, err := f.Readdir(-1)
   941  	f.Close()
   942  	if err != nil {
   943  		return nil, err
   944  	}
   945  	sort.Sort(byTypeAndName(fis))
   946  	for _, fi := range fis {
   947  		depn, err := t.statPath(filepath.Join(fullPath, filepath.Base(fi.Name())), fi)
   948  		if err != nil {
   949  			return nil, err
   950  		}
   951  		if depn != nil {
   952  			n.children = append(n.children, depn)
   953  		}
   954  	}
   955  	return n, nil
   956  }
   957  
   958  // testHookStatCache, if non-nil, runs first in the checkStatCache worker.
   959  var testHookStatCache func(n *node, ok bool)
   960  
   961  func (t *TreeUpload) run() {
   962  	defer close(t.donec)
   963  
   964  	// Kick off scanning all files, eventually learning the root
   965  	// node (which references all its children).
   966  	var root *node // nil until received and set in loop below.
   967  	rootc := make(chan *node, 1)
   968  	if !t.rootless {
   969  		go func() {
   970  			n, err := t.statPath(t.base, nil)
   971  			if err != nil {
   972  				log.Fatalf("Error scanning files under %s: %v", t.base, err)
   973  			}
   974  			close(t.stattedc)
   975  			rootc <- n
   976  		}()
   977  	}
   978  
   979  	var lastStat, lastUpload string
   980  	dumpStats := func() {
   981  		if android.IsChild() {
   982  			printAndroidCamputStatus(t)
   983  			return
   984  		}
   985  		statStatus := ""
   986  		if root == nil {
   987  			statStatus = fmt.Sprintf("last stat: %s", lastStat)
   988  		}
   989  		blobStats := t.up.Stats()
   990  		log.Printf("FILES: Total: %+v Skipped: %+v Uploaded: %+v %s BLOBS: %s Digested: %d last upload: %s",
   991  			t.total, t.skipped, t.uploaded,
   992  			statStatus,
   993  			blobStats.String(),
   994  			atomic.LoadInt64(&atomicDigestOps),
   995  			lastUpload)
   996  	}
   997  
   998  	// Channels for stats & progress bars. These are never closed:
   999  	uploadedc := make(chan *node) // at least tried to upload; server might have had blob
  1000  	skippedc := make(chan *node)  // didn't even hit blobserver; trusted our stat cache
  1001  
  1002  	uploadsdonec := make(chan bool)
  1003  	var upload chan<- *node
  1004  	withPermanode := t.up.fileOpts.wantFilePermanode()
  1005  	if t.DiskUsageMode {
  1006  		upload = NewNodeWorker(1, func(n *node, ok bool) {
  1007  			if !ok {
  1008  				uploadsdonec <- true
  1009  				return
  1010  			}
  1011  			if n.fi.IsDir() {
  1012  				fmt.Printf("%d\t%s\n", n.SumBytes()>>10, n.fullPath)
  1013  			}
  1014  		})
  1015  	} else {
  1016  		dirUpload := NewNodeWorker(dirUploadWorkers, func(n *node, ok bool) {
  1017  			if !ok {
  1018  				log.Printf("done uploading directories - done with all uploads.")
  1019  				uploadsdonec <- true
  1020  				return
  1021  			}
  1022  			put, err := t.up.uploadNode(n)
  1023  			if err != nil {
  1024  				log.Fatalf("Error uploading %s: %v", n.fullPath, err)
  1025  			}
  1026  			n.SetPutResult(put, nil)
  1027  			uploadedc <- n
  1028  		})
  1029  
  1030  		upload = NewNodeWorker(uploadWorkers, func(n *node, ok bool) {
  1031  			if !ok {
  1032  				log.Printf("done with all uploads.")
  1033  				close(dirUpload)
  1034  				return
  1035  			}
  1036  			if n.fi.IsDir() {
  1037  				dirUpload <- n
  1038  				return
  1039  			}
  1040  			put, err := t.up.uploadNode(n)
  1041  			if err != nil {
  1042  				log.Fatalf("Error uploading %s: %v", n.fullPath, err)
  1043  			}
  1044  			n.SetPutResult(put, nil)
  1045  			if c := t.up.statCache; c != nil {
  1046  				c.AddCachedPutResult(
  1047  					t.up.pwd, n.fullPath, n.fi, put, withPermanode)
  1048  			}
  1049  			uploadedc <- n
  1050  		})
  1051  	}
  1052  
  1053  	checkStatCache := NewNodeWorker(statCacheWorkers, func(n *node, ok bool) {
  1054  		if hook := testHookStatCache; hook != nil {
  1055  			hook(n, ok)
  1056  		}
  1057  		if !ok {
  1058  			if t.up.statCache != nil {
  1059  				log.Printf("done checking stat cache")
  1060  			}
  1061  			close(upload)
  1062  			return
  1063  		}
  1064  		if t.DiskUsageMode || t.up.statCache == nil {
  1065  			upload <- n
  1066  			return
  1067  		}
  1068  		if !n.fi.IsDir() {
  1069  			cachedRes, err := t.up.statCache.CachedPutResult(
  1070  				t.up.pwd, n.fullPath, n.fi, withPermanode)
  1071  			if err == nil {
  1072  				n.SetPutResult(cachedRes, nil)
  1073  				cachelog.Printf("Cache HIT on %q -> %v", n.fullPath, cachedRes)
  1074  				android.NoteFileUploaded(n.fullPath, false)
  1075  				skippedc <- n
  1076  				return
  1077  			}
  1078  		}
  1079  		upload <- n
  1080  	})
  1081  
  1082  	ticker := time.NewTicker(500 * time.Millisecond)
  1083  	defer ticker.Stop()
  1084  
  1085  	stattedc := t.stattedc
  1086  Loop:
  1087  	for {
  1088  		select {
  1089  		case <-uploadsdonec:
  1090  			break Loop
  1091  		case n := <-rootc:
  1092  			root = n
  1093  		case n := <-uploadedc:
  1094  			t.uploaded.incr(n)
  1095  			lastUpload = n.fullPath
  1096  		case n := <-skippedc:
  1097  			t.skipped.incr(n)
  1098  		case n, ok := <-stattedc:
  1099  			if !ok {
  1100  				log.Printf("done statting:")
  1101  				dumpStats()
  1102  				close(checkStatCache)
  1103  				stattedc = nil
  1104  				continue
  1105  			}
  1106  			lastStat = n.fullPath
  1107  			t.total.incr(n)
  1108  			checkStatCache <- n
  1109  		case <-ticker.C:
  1110  			dumpStats()
  1111  		}
  1112  	}
  1113  
  1114  	log.Printf("tree upload finished. final stats:")
  1115  	dumpStats()
  1116  
  1117  	if root == nil {
  1118  		panic("unexpected nil root node")
  1119  	}
  1120  	var err error
  1121  	log.Printf("Waiting on root node %q", root.fullPath)
  1122  	t.finalPutRes, err = root.PutResult()
  1123  	log.Printf("Waited on root node %q: %v", root.fullPath, t.finalPutRes)
  1124  	if err != nil {
  1125  		t.err = err
  1126  	}
  1127  }
  1128  
  1129  func (t *TreeUpload) Wait() (*client.PutResult, error) {
  1130  	<-t.donec
  1131  	// If an error is waiting and we don't otherwise have one, use it:
  1132  	if t.err == nil {
  1133  		select {
  1134  		case t.err = <-t.errc:
  1135  		default:
  1136  		}
  1137  	}
  1138  	if t.err == nil && t.finalPutRes == nil {
  1139  		panic("Nothing ever set t.finalPutRes, but no error set")
  1140  	}
  1141  	return t.finalPutRes, t.err
  1142  }
  1143  
  1144  type byTypeAndName []os.FileInfo
  1145  
  1146  func (s byTypeAndName) Len() int { return len(s) }
  1147  func (s byTypeAndName) Less(i, j int) bool {
  1148  	// files go before directories
  1149  	if s[i].IsDir() {
  1150  		if !s[j].IsDir() {
  1151  			return false
  1152  		}
  1153  	} else if s[j].IsDir() {
  1154  		return true
  1155  	}
  1156  	return s[i].Name() < s[j].Name()
  1157  }
  1158  func (s byTypeAndName) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  1159  
  1160  // trackDigestReader is an io.Reader wrapper which records the digest of what it reads.
  1161  type trackDigestReader struct {
  1162  	r io.Reader
  1163  	h hash.Hash
  1164  }
  1165  
  1166  func (t *trackDigestReader) Read(p []byte) (n int, err error) {
  1167  	if t.h == nil {
  1168  		t.h = sha1.New()
  1169  	}
  1170  	n, err = t.r.Read(p)
  1171  	t.h.Write(p[:n])
  1172  	return
  1173  }
  1174  
  1175  func (t *trackDigestReader) Sum() string {
  1176  	return fmt.Sprintf("sha1-%x", t.h.Sum(nil))
  1177  }